blob: c81c2dadbe803005f4b5eb7010b519adbbfbca79 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of the TargetInstrInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
15#include "AArch64Subtarget.h"
16#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "llvm/CodeGen/MachineFrameInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineMemOperand.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/PseudoSourceValue.h"
Diana Picus4b972882016-09-13 07:45:17 +000022#include "llvm/CodeGen/StackMaps.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000023#include "llvm/MC/MCInst.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/TargetRegistry.h"
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +000026#include <algorithm>
Tim Northover3b0846e2014-05-24 12:50:23 +000027
28using namespace llvm;
29
30#define GET_INSTRINFO_CTOR_DTOR
31#include "AArch64GenInstrInfo.inc"
32
George Burgess IV381fc0e2016-08-25 01:05:08 +000033static const MachineMemOperand::Flags MOSuppressPair =
Justin Lebar288b3372016-07-14 18:15:20 +000034 MachineMemOperand::MOTargetFlag1;
35
Matt Arsenaulte8da1452016-08-02 08:06:17 +000036static cl::opt<unsigned>
37TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
38 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
39
40static cl::opt<unsigned>
41CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
42 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
43
44static cl::opt<unsigned>
45BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
46 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
47
Tim Northover3b0846e2014-05-24 12:50:23 +000048AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
49 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
Eric Christophera0de2532015-03-18 20:37:30 +000050 RI(STI.getTargetTriple()), Subtarget(STI) {}
Tim Northover3b0846e2014-05-24 12:50:23 +000051
52/// GetInstSize - Return the number of bytes of code the specified
53/// instruction may be. This returns the maximum number of bytes.
Sjoerd Meijer89217f82016-07-28 16:32:22 +000054unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000055 const MachineBasicBlock &MBB = *MI.getParent();
Tim Northoverd5531f72014-06-17 11:31:42 +000056 const MachineFunction *MF = MBB.getParent();
57 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +000058
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000059 if (MI.getOpcode() == AArch64::INLINEASM)
60 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
Tim Northoverd5531f72014-06-17 11:31:42 +000061
Diana Picus4b972882016-09-13 07:45:17 +000062 // FIXME: We currently only handle pseudoinstructions that don't get expanded
63 // before the assembly printer.
64 unsigned NumBytes = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +000065 const MCInstrDesc &Desc = MI.getDesc();
Tim Northover3b0846e2014-05-24 12:50:23 +000066 switch (Desc.getOpcode()) {
67 default:
Diana Picusc65d8bd2016-07-27 15:13:25 +000068 // Anything not explicitly designated otherwise is a normal 4-byte insn.
Diana Picus4b972882016-09-13 07:45:17 +000069 NumBytes = 4;
70 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000071 case TargetOpcode::DBG_VALUE:
72 case TargetOpcode::EH_LABEL:
73 case TargetOpcode::IMPLICIT_DEF:
74 case TargetOpcode::KILL:
Diana Picus4b972882016-09-13 07:45:17 +000075 NumBytes = 0;
76 break;
77 case TargetOpcode::STACKMAP:
78 // The upper bound for a stackmap intrinsic is the full length of its shadow
79 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
80 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
81 break;
82 case TargetOpcode::PATCHPOINT:
83 // The size of the patchpoint intrinsic is the number of bytes requested
84 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
85 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
86 break;
Diana Picusab5a4c72016-08-01 08:38:49 +000087 case AArch64::TLSDESC_CALLSEQ:
88 // This gets lowered to an instruction sequence which takes 16 bytes
Diana Picus4b972882016-09-13 07:45:17 +000089 NumBytes = 16;
90 break;
Tim Northover3b0846e2014-05-24 12:50:23 +000091 }
92
Diana Picus4b972882016-09-13 07:45:17 +000093 return NumBytes;
Tim Northover3b0846e2014-05-24 12:50:23 +000094}
95
96static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
97 SmallVectorImpl<MachineOperand> &Cond) {
98 // Block ends with fall-through condbranch.
99 switch (LastInst->getOpcode()) {
100 default:
101 llvm_unreachable("Unknown branch instruction?");
102 case AArch64::Bcc:
103 Target = LastInst->getOperand(1).getMBB();
104 Cond.push_back(LastInst->getOperand(0));
105 break;
106 case AArch64::CBZW:
107 case AArch64::CBZX:
108 case AArch64::CBNZW:
109 case AArch64::CBNZX:
110 Target = LastInst->getOperand(1).getMBB();
111 Cond.push_back(MachineOperand::CreateImm(-1));
112 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
113 Cond.push_back(LastInst->getOperand(0));
114 break;
115 case AArch64::TBZW:
116 case AArch64::TBZX:
117 case AArch64::TBNZW:
118 case AArch64::TBNZX:
119 Target = LastInst->getOperand(2).getMBB();
120 Cond.push_back(MachineOperand::CreateImm(-1));
121 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
122 Cond.push_back(LastInst->getOperand(0));
123 Cond.push_back(LastInst->getOperand(1));
124 }
125}
126
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000127static unsigned getBranchDisplacementBits(unsigned Opc) {
128 switch (Opc) {
129 default:
130 llvm_unreachable("unexpected opcode!");
131 case AArch64::TBNZW:
132 case AArch64::TBZW:
133 case AArch64::TBNZX:
134 case AArch64::TBZX:
135 return TBZDisplacementBits;
136 case AArch64::CBNZW:
137 case AArch64::CBZW:
138 case AArch64::CBNZX:
139 case AArch64::CBZX:
140 return CBZDisplacementBits;
141 case AArch64::Bcc:
142 return BCCDisplacementBits;
143 }
144}
145
146static unsigned getBranchMaxDisplacementBytes(unsigned Opc) {
147 if (Opc == AArch64::B)
148 return -1;
149
150 unsigned Bits = getBranchDisplacementBits(Opc);
151 unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
Matt Arsenault6f1ae3c2016-08-02 08:56:52 +0000152
153 // Verify the displacement bits options have sane values.
154 // XXX: Is there a better place for this?
155 assert(MaxOffs >= 8 &&
156 "max branch displacement must be enough to jump"
157 "over conditional branch expansion");
158
Matt Arsenaulte8da1452016-08-02 08:06:17 +0000159 return MaxOffs;
160}
161
162bool AArch64InstrInfo::isBranchInRange(unsigned BranchOp, uint64_t BrOffset,
163 uint64_t DestOffset) const {
164 unsigned MaxOffs = getBranchMaxDisplacementBytes(BranchOp);
165
166 // Branch before the Dest.
167 if (BrOffset <= DestOffset)
168 return (DestOffset - BrOffset <= MaxOffs);
169 return (BrOffset - DestOffset <= MaxOffs);
170}
171
Tim Northover3b0846e2014-05-24 12:50:23 +0000172// Branch analysis.
Jacques Pienaar71c30a12016-07-15 14:41:04 +0000173bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
174 MachineBasicBlock *&TBB,
175 MachineBasicBlock *&FBB,
176 SmallVectorImpl<MachineOperand> &Cond,
177 bool AllowModify) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000178 // If the block has no terminators, it just falls into the block after it.
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000179 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
180 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000181 return false;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000182
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000183 if (!isUnpredicatedTerminator(*I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000184 return false;
185
186 // Get the last instruction in the block.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000187 MachineInstr *LastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000188
189 // If there is only one terminator instruction, process it.
190 unsigned LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000191 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000192 if (isUncondBranchOpcode(LastOpc)) {
193 TBB = LastInst->getOperand(0).getMBB();
194 return false;
195 }
196 if (isCondBranchOpcode(LastOpc)) {
197 // Block ends with fall-through condbranch.
198 parseCondBranch(LastInst, TBB, Cond);
199 return false;
200 }
201 return true; // Can't handle indirect branch.
202 }
203
204 // Get the instruction before it if it is a terminator.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000205 MachineInstr *SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000206 unsigned SecondLastOpc = SecondLastInst->getOpcode();
207
208 // If AllowModify is true and the block ends with two or more unconditional
209 // branches, delete all but the first unconditional branch.
210 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
211 while (isUncondBranchOpcode(SecondLastOpc)) {
212 LastInst->eraseFromParent();
213 LastInst = SecondLastInst;
214 LastOpc = LastInst->getOpcode();
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000215 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000216 // Return now the only terminator is an unconditional branch.
217 TBB = LastInst->getOperand(0).getMBB();
218 return false;
219 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000220 SecondLastInst = &*I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000221 SecondLastOpc = SecondLastInst->getOpcode();
222 }
223 }
224 }
225
226 // If there are three terminators, we don't know what sort of block this is.
Duncan P. N. Exon Smith6307eb52016-02-23 02:46:52 +0000227 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
Tim Northover3b0846e2014-05-24 12:50:23 +0000228 return true;
229
230 // If the block ends with a B and a Bcc, handle it.
231 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
232 parseCondBranch(SecondLastInst, TBB, Cond);
233 FBB = LastInst->getOperand(0).getMBB();
234 return false;
235 }
236
237 // If the block ends with two unconditional branches, handle it. The second
238 // one is not executed, so remove it.
239 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
240 TBB = SecondLastInst->getOperand(0).getMBB();
241 I = LastInst;
242 if (AllowModify)
243 I->eraseFromParent();
244 return false;
245 }
246
247 // ...likewise if it ends with an indirect branch followed by an unconditional
248 // branch.
249 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
250 I = LastInst;
251 if (AllowModify)
252 I->eraseFromParent();
253 return true;
254 }
255
256 // Otherwise, can't handle this.
257 return true;
258}
259
260bool AArch64InstrInfo::ReverseBranchCondition(
261 SmallVectorImpl<MachineOperand> &Cond) const {
262 if (Cond[0].getImm() != -1) {
263 // Regular Bcc
264 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
265 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
266 } else {
267 // Folded compare-and-branch
268 switch (Cond[1].getImm()) {
269 default:
270 llvm_unreachable("Unknown conditional branch!");
271 case AArch64::CBZW:
272 Cond[1].setImm(AArch64::CBNZW);
273 break;
274 case AArch64::CBNZW:
275 Cond[1].setImm(AArch64::CBZW);
276 break;
277 case AArch64::CBZX:
278 Cond[1].setImm(AArch64::CBNZX);
279 break;
280 case AArch64::CBNZX:
281 Cond[1].setImm(AArch64::CBZX);
282 break;
283 case AArch64::TBZW:
284 Cond[1].setImm(AArch64::TBNZW);
285 break;
286 case AArch64::TBNZW:
287 Cond[1].setImm(AArch64::TBZW);
288 break;
289 case AArch64::TBZX:
290 Cond[1].setImm(AArch64::TBNZX);
291 break;
292 case AArch64::TBNZX:
293 Cond[1].setImm(AArch64::TBZX);
294 break;
295 }
296 }
297
298 return false;
299}
300
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000301unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB,
302 int *BytesRemoved) const {
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000303 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
304 if (I == MBB.end())
Tim Northover3b0846e2014-05-24 12:50:23 +0000305 return 0;
Benjamin Kramere61cbd12015-06-25 13:28:24 +0000306
Tim Northover3b0846e2014-05-24 12:50:23 +0000307 if (!isUncondBranchOpcode(I->getOpcode()) &&
308 !isCondBranchOpcode(I->getOpcode()))
309 return 0;
310
311 // Remove the branch.
312 I->eraseFromParent();
313
314 I = MBB.end();
315
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000316 if (I == MBB.begin()) {
317 if (BytesRemoved)
318 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000319 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000320 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000321 --I;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000322 if (!isCondBranchOpcode(I->getOpcode())) {
323 if (BytesRemoved)
324 *BytesRemoved = 4;
Tim Northover3b0846e2014-05-24 12:50:23 +0000325 return 1;
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000326 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000327
328 // Remove the branch.
329 I->eraseFromParent();
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000330 if (BytesRemoved)
331 *BytesRemoved = 8;
332
Tim Northover3b0846e2014-05-24 12:50:23 +0000333 return 2;
334}
335
336void AArch64InstrInfo::instantiateCondBranch(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000337 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000338 ArrayRef<MachineOperand> Cond) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000339 if (Cond[0].getImm() != -1) {
340 // Regular Bcc
341 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
342 } else {
343 // Folded compare-and-branch
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000344 // Note that we use addOperand instead of addReg to keep the flags.
Tim Northover3b0846e2014-05-24 12:50:23 +0000345 const MachineInstrBuilder MIB =
Ahmed Bougacha72001cf2014-11-07 02:50:00 +0000346 BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
Tim Northover3b0846e2014-05-24 12:50:23 +0000347 if (Cond.size() > 3)
348 MIB.addImm(Cond[3].getImm());
349 MIB.addMBB(TBB);
350 }
351}
352
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000353unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000354 MachineBasicBlock *TBB,
355 MachineBasicBlock *FBB,
356 ArrayRef<MachineOperand> Cond,
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000357 const DebugLoc &DL,
358 int *BytesAdded) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000359 // Shouldn't be a fall through.
Matt Arsenaulte8e0f5c2016-09-14 17:24:15 +0000360 assert(TBB && "insertBranch must not be told to insert a fallthrough");
Tim Northover3b0846e2014-05-24 12:50:23 +0000361
362 if (!FBB) {
363 if (Cond.empty()) // Unconditional branch?
364 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
365 else
366 instantiateCondBranch(MBB, DL, TBB, Cond);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000367
368 if (BytesAdded)
369 *BytesAdded = 4;
370
Tim Northover3b0846e2014-05-24 12:50:23 +0000371 return 1;
372 }
373
374 // Two-way conditional branch.
375 instantiateCondBranch(MBB, DL, TBB, Cond);
376 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
Matt Arsenaulta2b036e2016-09-14 17:23:48 +0000377
378 if (BytesAdded)
379 *BytesAdded = 8;
380
Tim Northover3b0846e2014-05-24 12:50:23 +0000381 return 2;
382}
383
384// Find the original register that VReg is copied from.
385static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
386 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
387 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
388 if (!DefMI->isFullCopy())
389 return VReg;
390 VReg = DefMI->getOperand(1).getReg();
391 }
392 return VReg;
393}
394
395// Determine if VReg is defined by an instruction that can be folded into a
396// csel instruction. If so, return the folded opcode, and the replacement
397// register.
398static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
399 unsigned *NewVReg = nullptr) {
400 VReg = removeCopies(MRI, VReg);
401 if (!TargetRegisterInfo::isVirtualRegister(VReg))
402 return 0;
403
404 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
405 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
406 unsigned Opc = 0;
407 unsigned SrcOpNum = 0;
408 switch (DefMI->getOpcode()) {
409 case AArch64::ADDSXri:
410 case AArch64::ADDSWri:
411 // if NZCV is used, do not fold.
412 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
413 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000414 // fall-through to ADDXri and ADDWri.
415 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000416 case AArch64::ADDXri:
417 case AArch64::ADDWri:
418 // add x, 1 -> csinc.
419 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
420 DefMI->getOperand(3).getImm() != 0)
421 return 0;
422 SrcOpNum = 1;
423 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
424 break;
425
426 case AArch64::ORNXrr:
427 case AArch64::ORNWrr: {
428 // not x -> csinv, represented as orn dst, xzr, src.
429 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
430 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
431 return 0;
432 SrcOpNum = 2;
433 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
434 break;
435 }
436
437 case AArch64::SUBSXrr:
438 case AArch64::SUBSWrr:
439 // if NZCV is used, do not fold.
440 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
441 return 0;
Justin Bognerb03fd122016-08-17 05:10:15 +0000442 // fall-through to SUBXrr and SUBWrr.
443 LLVM_FALLTHROUGH;
Tim Northover3b0846e2014-05-24 12:50:23 +0000444 case AArch64::SUBXrr:
445 case AArch64::SUBWrr: {
446 // neg x -> csneg, represented as sub dst, xzr, src.
447 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
448 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
449 return 0;
450 SrcOpNum = 2;
451 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
452 break;
453 }
454 default:
455 return 0;
456 }
457 assert(Opc && SrcOpNum && "Missing parameters");
458
459 if (NewVReg)
460 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
461 return Opc;
462}
463
464bool AArch64InstrInfo::canInsertSelect(
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000465 const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000466 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
467 int &FalseCycles) const {
468 // Check register classes.
469 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
470 const TargetRegisterClass *RC =
Eric Christophera0de2532015-03-18 20:37:30 +0000471 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
Tim Northover3b0846e2014-05-24 12:50:23 +0000472 if (!RC)
473 return false;
474
475 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
476 unsigned ExtraCondLat = Cond.size() != 1;
477
478 // GPRs are handled by csel.
479 // FIXME: Fold in x+1, -x, and ~x when applicable.
480 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
481 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
482 // Single-cycle csel, csinc, csinv, and csneg.
483 CondCycles = 1 + ExtraCondLat;
484 TrueCycles = FalseCycles = 1;
485 if (canFoldIntoCSel(MRI, TrueReg))
486 TrueCycles = 0;
487 else if (canFoldIntoCSel(MRI, FalseReg))
488 FalseCycles = 0;
489 return true;
490 }
491
492 // Scalar floating point is handled by fcsel.
493 // FIXME: Form fabs, fmin, and fmax when applicable.
494 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
495 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
496 CondCycles = 5 + ExtraCondLat;
497 TrueCycles = FalseCycles = 2;
498 return true;
499 }
500
501 // Can't do vectors.
502 return false;
503}
504
505void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000506 MachineBasicBlock::iterator I,
507 const DebugLoc &DL, unsigned DstReg,
Ahmed Bougachac88bf542015-06-11 19:30:37 +0000508 ArrayRef<MachineOperand> Cond,
Tim Northover3b0846e2014-05-24 12:50:23 +0000509 unsigned TrueReg, unsigned FalseReg) const {
510 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
511
512 // Parse the condition code, see parseCondBranch() above.
513 AArch64CC::CondCode CC;
514 switch (Cond.size()) {
515 default:
516 llvm_unreachable("Unknown condition opcode in Cond");
517 case 1: // b.cc
518 CC = AArch64CC::CondCode(Cond[0].getImm());
519 break;
520 case 3: { // cbz/cbnz
521 // We must insert a compare against 0.
522 bool Is64Bit;
523 switch (Cond[1].getImm()) {
524 default:
525 llvm_unreachable("Unknown branch opcode in Cond");
526 case AArch64::CBZW:
527 Is64Bit = 0;
528 CC = AArch64CC::EQ;
529 break;
530 case AArch64::CBZX:
531 Is64Bit = 1;
532 CC = AArch64CC::EQ;
533 break;
534 case AArch64::CBNZW:
535 Is64Bit = 0;
536 CC = AArch64CC::NE;
537 break;
538 case AArch64::CBNZX:
539 Is64Bit = 1;
540 CC = AArch64CC::NE;
541 break;
542 }
543 unsigned SrcReg = Cond[2].getReg();
544 if (Is64Bit) {
545 // cmp reg, #0 is actually subs xzr, reg, #0.
546 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
547 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
548 .addReg(SrcReg)
549 .addImm(0)
550 .addImm(0);
551 } else {
552 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
553 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
554 .addReg(SrcReg)
555 .addImm(0)
556 .addImm(0);
557 }
558 break;
559 }
560 case 4: { // tbz/tbnz
561 // We must insert a tst instruction.
562 switch (Cond[1].getImm()) {
563 default:
564 llvm_unreachable("Unknown branch opcode in Cond");
565 case AArch64::TBZW:
566 case AArch64::TBZX:
567 CC = AArch64CC::EQ;
568 break;
569 case AArch64::TBNZW:
570 case AArch64::TBNZX:
571 CC = AArch64CC::NE;
572 break;
573 }
574 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
575 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
576 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
577 .addReg(Cond[2].getReg())
578 .addImm(
579 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
580 else
581 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
582 .addReg(Cond[2].getReg())
583 .addImm(
584 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
585 break;
586 }
587 }
588
589 unsigned Opc = 0;
590 const TargetRegisterClass *RC = nullptr;
591 bool TryFold = false;
592 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
593 RC = &AArch64::GPR64RegClass;
594 Opc = AArch64::CSELXr;
595 TryFold = true;
596 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
597 RC = &AArch64::GPR32RegClass;
598 Opc = AArch64::CSELWr;
599 TryFold = true;
600 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
601 RC = &AArch64::FPR64RegClass;
602 Opc = AArch64::FCSELDrrr;
603 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
604 RC = &AArch64::FPR32RegClass;
605 Opc = AArch64::FCSELSrrr;
606 }
607 assert(RC && "Unsupported regclass");
608
609 // Try folding simple instructions into the csel.
610 if (TryFold) {
611 unsigned NewVReg = 0;
612 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
613 if (FoldedOpc) {
614 // The folded opcodes csinc, csinc and csneg apply the operation to
615 // FalseReg, so we need to invert the condition.
616 CC = AArch64CC::getInvertedCondCode(CC);
617 TrueReg = FalseReg;
618 } else
619 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
620
621 // Fold the operation. Leave any dead instructions for DCE to clean up.
622 if (FoldedOpc) {
623 FalseReg = NewVReg;
624 Opc = FoldedOpc;
625 // The extends the live range of NewVReg.
626 MRI.clearKillFlags(NewVReg);
627 }
628 }
629
630 // Pull all virtual register into the appropriate class.
631 MRI.constrainRegClass(TrueReg, RC);
632 MRI.constrainRegClass(FalseReg, RC);
633
634 // Insert the csel.
635 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
636 CC);
637}
638
Lawrence Hu687097a2015-07-23 23:55:28 +0000639/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000640static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
641 uint64_t Imm = MI.getOperand(1).getImm();
Weiming Zhaob33a5552015-07-23 19:24:53 +0000642 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
643 uint64_t Encoding;
644 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
645}
646
Jiangning Liucd296372014-07-29 02:09:26 +0000647// FIXME: this implementation should be micro-architecture dependent, so a
648// micro-architecture target hook should be introduced here in future.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000649bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
Matthias Braun651cff42016-06-02 18:03:53 +0000650 if (!Subtarget.hasCustomCheapAsMoveHandling())
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000651 return MI.isAsCheapAsAMove();
Jiangning Liucd296372014-07-29 02:09:26 +0000652
Evandro Menezesd23324a2016-05-04 20:47:25 +0000653 unsigned Imm;
654
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000655 switch (MI.getOpcode()) {
Jiangning Liucd296372014-07-29 02:09:26 +0000656 default:
657 return false;
658
659 // add/sub on register without shift
660 case AArch64::ADDWri:
661 case AArch64::ADDXri:
662 case AArch64::SUBWri:
663 case AArch64::SUBXri:
Matthias Braun651cff42016-06-02 18:03:53 +0000664 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000665 MI.getOperand(3).getImm() == 0);
Evandro Menezesd23324a2016-05-04 20:47:25 +0000666
667 // add/sub on register with shift
668 case AArch64::ADDWrs:
669 case AArch64::ADDXrs:
670 case AArch64::SUBWrs:
671 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000672 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000673 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000674 AArch64_AM::getArithShiftValue(Imm) < 4);
Jiangning Liucd296372014-07-29 02:09:26 +0000675
676 // logical ops on immediate
677 case AArch64::ANDWri:
678 case AArch64::ANDXri:
679 case AArch64::EORWri:
680 case AArch64::EORXri:
681 case AArch64::ORRWri:
682 case AArch64::ORRXri:
683 return true;
684
685 // logical ops on register without shift
686 case AArch64::ANDWrr:
687 case AArch64::ANDXrr:
688 case AArch64::BICWrr:
689 case AArch64::BICXrr:
690 case AArch64::EONWrr:
691 case AArch64::EONXrr:
692 case AArch64::EORWrr:
693 case AArch64::EORXrr:
694 case AArch64::ORNWrr:
695 case AArch64::ORNXrr:
696 case AArch64::ORRWrr:
697 case AArch64::ORRXrr:
698 return true;
Evandro Menezesd23324a2016-05-04 20:47:25 +0000699
700 // logical ops on register with shift
701 case AArch64::ANDWrs:
702 case AArch64::ANDXrs:
703 case AArch64::BICWrs:
704 case AArch64::BICXrs:
705 case AArch64::EONWrs:
706 case AArch64::EONXrs:
707 case AArch64::EORWrs:
708 case AArch64::EORXrs:
709 case AArch64::ORNWrs:
710 case AArch64::ORNXrs:
711 case AArch64::ORRWrs:
712 case AArch64::ORRXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000713 Imm = MI.getOperand(3).getImm();
Matthias Braun651cff42016-06-02 18:03:53 +0000714 return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
Evandro Menezesd23324a2016-05-04 20:47:25 +0000715 AArch64_AM::getShiftValue(Imm) < 4 &&
716 AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
717
Weiming Zhaob33a5552015-07-23 19:24:53 +0000718 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
719 // ORRXri, it is as cheap as MOV
720 case AArch64::MOVi32imm:
721 return canBeExpandedToORR(MI, 32);
722 case AArch64::MOVi64imm:
723 return canBeExpandedToORR(MI, 64);
Haicheng Wu711ca862016-07-12 15:31:41 +0000724
Haicheng Wuf0b01272016-07-15 00:27:01 +0000725 // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
726 // feature.
Haicheng Wu711ca862016-07-12 15:31:41 +0000727 case AArch64::FMOVS0:
728 case AArch64::FMOVD0:
729 return Subtarget.hasZeroCycleZeroing();
Haicheng Wuf0b01272016-07-15 00:27:01 +0000730 case TargetOpcode::COPY:
731 return (Subtarget.hasZeroCycleZeroing() &&
732 (MI.getOperand(1).getReg() == AArch64::WZR ||
733 MI.getOperand(1).getReg() == AArch64::XZR));
Jiangning Liucd296372014-07-29 02:09:26 +0000734 }
735
736 llvm_unreachable("Unknown opcode to check as cheap as a move!");
737}
738
Tim Northover3b0846e2014-05-24 12:50:23 +0000739bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
740 unsigned &SrcReg, unsigned &DstReg,
741 unsigned &SubIdx) const {
742 switch (MI.getOpcode()) {
743 default:
744 return false;
745 case AArch64::SBFMXri: // aka sxtw
746 case AArch64::UBFMXri: // aka uxtw
747 // Check for the 32 -> 64 bit extension case, these instructions can do
748 // much more.
749 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
750 return false;
751 // This is a signed or unsigned 32 -> 64 bit extension.
752 SrcReg = MI.getOperand(1).getReg();
753 DstReg = MI.getOperand(0).getReg();
754 SubIdx = AArch64::sub_32;
755 return true;
756 }
757}
758
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000759bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
760 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
Eric Christophera0de2532015-03-18 20:37:30 +0000761 const TargetRegisterInfo *TRI = &getRegisterInfo();
Chad Rosier3528c1e2014-09-08 14:43:48 +0000762 unsigned BaseRegA = 0, BaseRegB = 0;
Chad Rosier0da267d2016-03-09 16:46:48 +0000763 int64_t OffsetA = 0, OffsetB = 0;
764 unsigned WidthA = 0, WidthB = 0;
Chad Rosier3528c1e2014-09-08 14:43:48 +0000765
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000766 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
767 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
Chad Rosier3528c1e2014-09-08 14:43:48 +0000768
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000769 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
770 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
Chad Rosier3528c1e2014-09-08 14:43:48 +0000771 return false;
772
773 // Retrieve the base register, offset from the base register and width. Width
774 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
775 // base registers are identical, and the offset of a lower memory access +
776 // the width doesn't overlap the offset of a higher memory access,
777 // then the memory accesses are different.
Sanjoy Dasb666ea32015-06-15 18:44:14 +0000778 if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
779 getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
Chad Rosier3528c1e2014-09-08 14:43:48 +0000780 if (BaseRegA == BaseRegB) {
781 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
782 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
783 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
784 if (LowOffset + LowWidth <= HighOffset)
785 return true;
786 }
787 }
788 return false;
789}
790
Tim Northover3b0846e2014-05-24 12:50:23 +0000791/// analyzeCompare - For a comparison instruction, return the source registers
792/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
793/// Return true if the comparison instruction can be analyzed.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000794bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
Tim Northover3b0846e2014-05-24 12:50:23 +0000795 unsigned &SrcReg2, int &CmpMask,
796 int &CmpValue) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000797 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000798 default:
799 break;
800 case AArch64::SUBSWrr:
801 case AArch64::SUBSWrs:
802 case AArch64::SUBSWrx:
803 case AArch64::SUBSXrr:
804 case AArch64::SUBSXrs:
805 case AArch64::SUBSXrx:
806 case AArch64::ADDSWrr:
807 case AArch64::ADDSWrs:
808 case AArch64::ADDSWrx:
809 case AArch64::ADDSXrr:
810 case AArch64::ADDSXrs:
811 case AArch64::ADDSXrx:
812 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000813 SrcReg = MI.getOperand(1).getReg();
814 SrcReg2 = MI.getOperand(2).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000815 CmpMask = ~0;
816 CmpValue = 0;
817 return true;
818 case AArch64::SUBSWri:
819 case AArch64::ADDSWri:
820 case AArch64::SUBSXri:
821 case AArch64::ADDSXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000822 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000823 SrcReg2 = 0;
824 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000825 // FIXME: In order to convert CmpValue to 0 or 1
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000826 CmpValue = MI.getOperand(2).getImm() != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000827 return true;
828 case AArch64::ANDSWri:
829 case AArch64::ANDSXri:
830 // ANDS does not use the same encoding scheme as the others xxxS
831 // instructions.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000832 SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000833 SrcReg2 = 0;
834 CmpMask = ~0;
Jiangning Liudcc651f2014-08-08 14:19:29 +0000835 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
836 // while the type of CmpValue is int. When converting uint64_t to int,
837 // the high 32 bits of uint64_t will be lost.
838 // In fact it causes a bug in spec2006-483.xalancbmk
839 // CmpValue is only used to compare with zero in OptimizeCompareInstr
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000840 CmpValue = AArch64_AM::decodeLogicalImmediate(
841 MI.getOperand(2).getImm(),
842 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000843 return true;
844 }
845
846 return false;
847}
848
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000849static bool UpdateOperandRegClass(MachineInstr &Instr) {
850 MachineBasicBlock *MBB = Instr.getParent();
Tim Northover3b0846e2014-05-24 12:50:23 +0000851 assert(MBB && "Can't get MachineBasicBlock here");
852 MachineFunction *MF = MBB->getParent();
853 assert(MF && "Can't get MachineFunction here");
Eric Christopher6c901622015-01-28 03:51:33 +0000854 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
855 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000856 MachineRegisterInfo *MRI = &MF->getRegInfo();
857
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000858 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
Tim Northover3b0846e2014-05-24 12:50:23 +0000859 ++OpIdx) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000860 MachineOperand &MO = Instr.getOperand(OpIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000861 const TargetRegisterClass *OpRegCstraints =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000862 Instr.getRegClassConstraint(OpIdx, TII, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000863
864 // If there's no constraint, there's nothing to do.
865 if (!OpRegCstraints)
866 continue;
867 // If the operand is a frame index, there's nothing to do here.
868 // A frame index operand will resolve correctly during PEI.
869 if (MO.isFI())
870 continue;
871
872 assert(MO.isReg() &&
873 "Operand has register constraints without being a register!");
874
875 unsigned Reg = MO.getReg();
876 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
877 if (!OpRegCstraints->contains(Reg))
878 return false;
879 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
880 !MRI->constrainRegClass(Reg, OpRegCstraints))
881 return false;
882 }
883
884 return true;
885}
886
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000887/// \brief Return the opcode that does not set flags when possible - otherwise
888/// return the original opcode. The caller is responsible to do the actual
889/// substitution and legality checking.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000890static unsigned convertFlagSettingOpcode(const MachineInstr &MI) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000891 // Don't convert all compare instructions, because for some the zero register
892 // encoding becomes the sp register.
893 bool MIDefinesZeroReg = false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000894 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000895 MIDefinesZeroReg = true;
896
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000897 switch (MI.getOpcode()) {
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000898 default:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000899 return MI.getOpcode();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000900 case AArch64::ADDSWrr:
901 return AArch64::ADDWrr;
902 case AArch64::ADDSWri:
903 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
904 case AArch64::ADDSWrs:
905 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
906 case AArch64::ADDSWrx:
907 return AArch64::ADDWrx;
908 case AArch64::ADDSXrr:
909 return AArch64::ADDXrr;
910 case AArch64::ADDSXri:
911 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
912 case AArch64::ADDSXrs:
913 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
914 case AArch64::ADDSXrx:
915 return AArch64::ADDXrx;
916 case AArch64::SUBSWrr:
917 return AArch64::SUBWrr;
918 case AArch64::SUBSWri:
919 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
920 case AArch64::SUBSWrs:
921 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
922 case AArch64::SUBSWrx:
923 return AArch64::SUBWrx;
924 case AArch64::SUBSXrr:
925 return AArch64::SUBXrr;
926 case AArch64::SUBSXri:
927 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
928 case AArch64::SUBSXrs:
929 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
930 case AArch64::SUBSXrx:
931 return AArch64::SUBXrx;
932 }
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000933}
Tim Northover3b0846e2014-05-24 12:50:23 +0000934
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000935enum AccessKind {
936 AK_Write = 0x01,
937 AK_Read = 0x10,
938 AK_All = 0x11
939};
940
941/// True when condition flags are accessed (either by writing or reading)
942/// on the instruction trace starting at From and ending at To.
943///
944/// Note: If From and To are from different blocks it's assumed CC are accessed
945/// on the path.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000946static bool areCFlagsAccessedBetweenInstrs(
947 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
948 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000949 // Early exit if To is at the beginning of the BB.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000950 if (To == To->getParent()->begin())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000951 return true;
952
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000953 // Check whether the instructions are in the same basic block
954 // If not, assume the condition flags might get modified somewhere.
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000955 if (To->getParent() != From->getParent())
956 return true;
957
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000958 // From must be above To.
Duncan P. N. Exon Smith18720962016-09-11 18:51:28 +0000959 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
960 [From](MachineInstr &MI) {
961 return MI.getIterator() == From;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000962 }) != To->getParent()->rend());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000963
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000964 // We iterate backward starting \p To until we hit \p From.
965 for (--To; To != From; --To) {
966 const MachineInstr &Instr = *To;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000967
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000968 if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
969 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +0000970 return true;
971 }
972 return false;
973}
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000974
975/// Try to optimize a compare instruction. A compare instruction is an
976/// instruction which produces AArch64::NZCV. It can be truly compare instruction
977/// when there are no uses of its destination register.
978///
979/// The following steps are tried in order:
980/// 1. Convert CmpInstr into an unconditional version.
981/// 2. Remove CmpInstr if above there is an instruction producing a needed
982/// condition code or an instruction which can be converted into such an instruction.
983/// Only comparison with zero is supported.
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000984bool AArch64InstrInfo::optimizeCompareInstr(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000985 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000986 int CmpValue, const MachineRegisterInfo *MRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000987 assert(CmpInstr.getParent());
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000988 assert(MRI);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000989
990 // Replace SUBSWrr with SUBWrr if NZCV is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000991 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +0000992 if (DeadNZCVIdx != -1) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000993 if (CmpInstr.definesRegister(AArch64::WZR) ||
994 CmpInstr.definesRegister(AArch64::XZR)) {
995 CmpInstr.eraseFromParent();
Juergen Ributzka7a7c4682014-11-18 21:02:40 +0000996 return true;
997 }
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000998 unsigned Opc = CmpInstr.getOpcode();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +0000999 unsigned NewOpc = convertFlagSettingOpcode(CmpInstr);
1000 if (NewOpc == Opc)
1001 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001002 const MCInstrDesc &MCID = get(NewOpc);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001003 CmpInstr.setDesc(MCID);
1004 CmpInstr.RemoveOperand(DeadNZCVIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +00001005 bool succeeded = UpdateOperandRegClass(CmpInstr);
1006 (void)succeeded;
1007 assert(succeeded && "Some operands reg class are incompatible!");
1008 return true;
1009 }
1010
1011 // Continue only if we have a "ri" where immediate is zero.
Jiangning Liudcc651f2014-08-08 14:19:29 +00001012 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1013 // function.
1014 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
Tim Northover3b0846e2014-05-24 12:50:23 +00001015 if (CmpValue != 0 || SrcReg2 != 0)
1016 return false;
1017
1018 // CmpInstr is a Compare instruction if destination register is not used.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001019 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001020 return false;
1021
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001022 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001023}
Tim Northover3b0846e2014-05-24 12:50:23 +00001024
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001025/// Get opcode of S version of Instr.
1026/// If Instr is S version its opcode is returned.
1027/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1028/// or we are not interested in it.
1029static unsigned sForm(MachineInstr &Instr) {
1030 switch (Instr.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001031 default:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001032 return AArch64::INSTRUCTION_LIST_END;
1033
Tim Northover3b0846e2014-05-24 12:50:23 +00001034 case AArch64::ADDSWrr:
1035 case AArch64::ADDSWri:
1036 case AArch64::ADDSXrr:
1037 case AArch64::ADDSXri:
1038 case AArch64::SUBSWrr:
1039 case AArch64::SUBSWri:
1040 case AArch64::SUBSXrr:
1041 case AArch64::SUBSXri:
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001042 return Instr.getOpcode();;
1043
1044 case AArch64::ADDWrr: return AArch64::ADDSWrr;
1045 case AArch64::ADDWri: return AArch64::ADDSWri;
1046 case AArch64::ADDXrr: return AArch64::ADDSXrr;
1047 case AArch64::ADDXri: return AArch64::ADDSXri;
1048 case AArch64::ADCWr: return AArch64::ADCSWr;
1049 case AArch64::ADCXr: return AArch64::ADCSXr;
1050 case AArch64::SUBWrr: return AArch64::SUBSWrr;
1051 case AArch64::SUBWri: return AArch64::SUBSWri;
1052 case AArch64::SUBXrr: return AArch64::SUBSXrr;
1053 case AArch64::SUBXri: return AArch64::SUBSXri;
1054 case AArch64::SBCWr: return AArch64::SBCSWr;
1055 case AArch64::SBCXr: return AArch64::SBCSXr;
1056 case AArch64::ANDWri: return AArch64::ANDSWri;
1057 case AArch64::ANDXri: return AArch64::ANDSXri;
Tim Northover3b0846e2014-05-24 12:50:23 +00001058 }
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001059}
1060
1061/// Check if AArch64::NZCV should be alive in successors of MBB.
1062static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1063 for (auto *BB : MBB->successors())
1064 if (BB->isLiveIn(AArch64::NZCV))
1065 return true;
1066 return false;
1067}
1068
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001069namespace {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001070struct UsedNZCV {
1071 bool N;
1072 bool Z;
1073 bool C;
1074 bool V;
1075 UsedNZCV(): N(false), Z(false), C(false), V(false) {}
1076 UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
1077 this->N |= UsedFlags.N;
1078 this->Z |= UsedFlags.Z;
1079 this->C |= UsedFlags.C;
1080 this->V |= UsedFlags.V;
1081 return *this;
1082 }
1083};
Benjamin Kramerb7d33112016-08-06 11:13:10 +00001084} // end anonymous namespace
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001085
1086/// Find a condition code used by the instruction.
1087/// Returns AArch64CC::Invalid if either the instruction does not use condition
1088/// codes or we don't optimize CmpInstr in the presence of such instructions.
1089static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1090 switch (Instr.getOpcode()) {
1091 default:
1092 return AArch64CC::Invalid;
1093
1094 case AArch64::Bcc: {
1095 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1096 assert(Idx >= 2);
1097 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1098 }
1099
1100 case AArch64::CSINVWr:
1101 case AArch64::CSINVXr:
1102 case AArch64::CSINCWr:
1103 case AArch64::CSINCXr:
1104 case AArch64::CSELWr:
1105 case AArch64::CSELXr:
1106 case AArch64::CSNEGWr:
1107 case AArch64::CSNEGXr:
1108 case AArch64::FCSELSrrr:
1109 case AArch64::FCSELDrrr: {
1110 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1111 assert(Idx >= 1);
1112 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1113 }
1114 }
1115}
1116
1117static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1118 assert(CC != AArch64CC::Invalid);
1119 UsedNZCV UsedFlags;
1120 switch (CC) {
1121 default:
1122 break;
1123
1124 case AArch64CC::EQ: // Z set
1125 case AArch64CC::NE: // Z clear
1126 UsedFlags.Z = true;
1127 break;
1128
1129 case AArch64CC::HI: // Z clear and C set
1130 case AArch64CC::LS: // Z set or C clear
1131 UsedFlags.Z = true;
1132 case AArch64CC::HS: // C set
1133 case AArch64CC::LO: // C clear
1134 UsedFlags.C = true;
1135 break;
1136
1137 case AArch64CC::MI: // N set
1138 case AArch64CC::PL: // N clear
1139 UsedFlags.N = true;
1140 break;
1141
1142 case AArch64CC::VS: // V set
1143 case AArch64CC::VC: // V clear
1144 UsedFlags.V = true;
1145 break;
1146
1147 case AArch64CC::GT: // Z clear, N and V the same
1148 case AArch64CC::LE: // Z set, N and V differ
1149 UsedFlags.Z = true;
1150 case AArch64CC::GE: // N and V the same
1151 case AArch64CC::LT: // N and V differ
1152 UsedFlags.N = true;
1153 UsedFlags.V = true;
1154 break;
1155 }
1156 return UsedFlags;
1157}
1158
1159static bool isADDSRegImm(unsigned Opcode) {
1160 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1161}
1162
1163static bool isSUBSRegImm(unsigned Opcode) {
1164 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1165}
1166
1167/// Check if CmpInstr can be substituted by MI.
1168///
1169/// CmpInstr can be substituted:
1170/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1171/// - and, MI and CmpInstr are from the same MachineBB
1172/// - and, condition flags are not alive in successors of the CmpInstr parent
1173/// - and, if MI opcode is the S form there must be no defs of flags between
1174/// MI and CmpInstr
1175/// or if MI opcode is not the S form there must be neither defs of flags
1176/// nor uses of flags between MI and CmpInstr.
1177/// - and C/V flags are not used after CmpInstr
1178static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1179 const TargetRegisterInfo *TRI) {
1180 assert(MI);
1181 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1182 assert(CmpInstr);
1183
1184 const unsigned CmpOpcode = CmpInstr->getOpcode();
1185 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1186 return false;
1187
1188 if (MI->getParent() != CmpInstr->getParent())
1189 return false;
1190
1191 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1192 return false;
1193
1194 AccessKind AccessToCheck = AK_Write;
1195 if (sForm(*MI) != MI->getOpcode())
1196 AccessToCheck = AK_All;
1197 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1198 return false;
1199
1200 UsedNZCV NZCVUsedAfterCmp;
1201 for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
1202 I != E; ++I) {
1203 const MachineInstr &Instr = *I;
1204 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1205 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1206 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1207 return false;
1208 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1209 }
1210
1211 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1212 break;
1213 }
1214
1215 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1216}
1217
1218/// Substitute an instruction comparing to zero with another instruction
1219/// which produces needed condition flags.
1220///
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001221/// Return true on success.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001222bool AArch64InstrInfo::substituteCmpToZero(
1223 MachineInstr &CmpInstr, unsigned SrcReg,
1224 const MachineRegisterInfo *MRI) const {
Evgeny Astigeevichfd89fe02016-04-21 08:54:08 +00001225 assert(MRI);
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001226 // Get the unique definition of SrcReg.
1227 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1228 if (!MI)
1229 return false;
1230
1231 const TargetRegisterInfo *TRI = &getRegisterInfo();
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001232
1233 unsigned NewOpc = sForm(*MI);
1234 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1235 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001236
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001237 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00001238 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001239
1240 // Update the instruction to set NZCV.
1241 MI->setDesc(get(NewOpc));
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001242 CmpInstr.eraseFromParent();
1243 bool succeeded = UpdateOperandRegClass(*MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001244 (void)succeeded;
1245 assert(succeeded && "Some operands reg class are incompatible!");
1246 MI->addRegisterDefined(AArch64::NZCV, TRI);
1247 return true;
1248}
1249
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001250bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1251 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001252 return false;
1253
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001254 MachineBasicBlock &MBB = *MI.getParent();
1255 DebugLoc DL = MI.getDebugLoc();
1256 unsigned Reg = MI.getOperand(0).getReg();
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001257 const GlobalValue *GV =
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001258 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001259 const TargetMachine &TM = MBB.getParent()->getTarget();
1260 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1261 const unsigned char MO_NC = AArch64II::MO_NC;
1262
1263 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1264 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1265 .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1266 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001267 .addReg(Reg, RegState::Kill)
1268 .addImm(0)
1269 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001270 } else if (TM.getCodeModel() == CodeModel::Large) {
1271 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1272 .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
1273 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1274 .addReg(Reg, RegState::Kill)
1275 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
1276 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1277 .addReg(Reg, RegState::Kill)
1278 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
1279 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1280 .addReg(Reg, RegState::Kill)
1281 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
1282 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001283 .addReg(Reg, RegState::Kill)
1284 .addImm(0)
1285 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001286 } else {
1287 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1288 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1289 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1290 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1291 .addReg(Reg, RegState::Kill)
1292 .addGlobalAddress(GV, 0, LoFlags)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001293 .addMemOperand(*MI.memoperands_begin());
Akira Hatanakae5b6e0d2014-07-25 19:31:34 +00001294 }
1295
1296 MBB.erase(MI);
1297
1298 return true;
1299}
1300
Tim Northover3b0846e2014-05-24 12:50:23 +00001301/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001302bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
1303 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001304 default:
1305 break;
1306 case AArch64::ADDSWrs:
1307 case AArch64::ADDSXrs:
1308 case AArch64::ADDWrs:
1309 case AArch64::ADDXrs:
1310 case AArch64::ANDSWrs:
1311 case AArch64::ANDSXrs:
1312 case AArch64::ANDWrs:
1313 case AArch64::ANDXrs:
1314 case AArch64::BICSWrs:
1315 case AArch64::BICSXrs:
1316 case AArch64::BICWrs:
1317 case AArch64::BICXrs:
1318 case AArch64::CRC32Brr:
1319 case AArch64::CRC32CBrr:
1320 case AArch64::CRC32CHrr:
1321 case AArch64::CRC32CWrr:
1322 case AArch64::CRC32CXrr:
1323 case AArch64::CRC32Hrr:
1324 case AArch64::CRC32Wrr:
1325 case AArch64::CRC32Xrr:
1326 case AArch64::EONWrs:
1327 case AArch64::EONXrs:
1328 case AArch64::EORWrs:
1329 case AArch64::EORXrs:
1330 case AArch64::ORNWrs:
1331 case AArch64::ORNXrs:
1332 case AArch64::ORRWrs:
1333 case AArch64::ORRXrs:
1334 case AArch64::SUBSWrs:
1335 case AArch64::SUBSXrs:
1336 case AArch64::SUBWrs:
1337 case AArch64::SUBXrs:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001338 if (MI.getOperand(3).isImm()) {
1339 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001340 return (val != 0);
1341 }
1342 break;
1343 }
1344 return false;
1345}
1346
1347/// Return true if this is this instruction has a non-zero immediate
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001348bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) const {
1349 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001350 default:
1351 break;
1352 case AArch64::ADDSWrx:
1353 case AArch64::ADDSXrx:
1354 case AArch64::ADDSXrx64:
1355 case AArch64::ADDWrx:
1356 case AArch64::ADDXrx:
1357 case AArch64::ADDXrx64:
1358 case AArch64::SUBSWrx:
1359 case AArch64::SUBSXrx:
1360 case AArch64::SUBSXrx64:
1361 case AArch64::SUBWrx:
1362 case AArch64::SUBXrx:
1363 case AArch64::SUBXrx64:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001364 if (MI.getOperand(3).isImm()) {
1365 unsigned val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001366 return (val != 0);
1367 }
1368 break;
1369 }
1370
1371 return false;
1372}
1373
1374// Return true if this instruction simply sets its single destination register
1375// to zero. This is equivalent to a register rename of the zero-register.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001376bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) const {
1377 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001378 default:
1379 break;
1380 case AArch64::MOVZWi:
1381 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001382 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1383 assert(MI.getDesc().getNumOperands() == 3 &&
1384 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001385 return true;
1386 }
1387 break;
1388 case AArch64::ANDWri: // and Rd, Rzr, #imm
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001389 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001390 case AArch64::ANDXri:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001391 return MI.getOperand(1).getReg() == AArch64::XZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001392 case TargetOpcode::COPY:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001393 return MI.getOperand(1).getReg() == AArch64::WZR;
Tim Northover3b0846e2014-05-24 12:50:23 +00001394 }
1395 return false;
1396}
1397
1398// Return true if this instruction simply renames a general register without
1399// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001400bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) const {
1401 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001402 default:
1403 break;
1404 case TargetOpcode::COPY: {
1405 // GPR32 copies will by lowered to ORRXrs
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001406 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001407 return (AArch64::GPR32RegClass.contains(DstReg) ||
1408 AArch64::GPR64RegClass.contains(DstReg));
1409 }
1410 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001411 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1412 assert(MI.getDesc().getNumOperands() == 4 &&
1413 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001414 return true;
1415 }
Renato Golin541d7e72014-08-01 17:27:31 +00001416 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001417 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001418 if (MI.getOperand(2).getImm() == 0) {
1419 assert(MI.getDesc().getNumOperands() == 4 &&
1420 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
Tim Northover3b0846e2014-05-24 12:50:23 +00001421 return true;
1422 }
Renato Golin541d7e72014-08-01 17:27:31 +00001423 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001424 }
1425 return false;
1426}
1427
1428// Return true if this instruction simply renames a general register without
1429// modifying bits.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001430bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) const {
1431 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001432 default:
1433 break;
1434 case TargetOpcode::COPY: {
1435 // FPR64 copies will by lowered to ORR.16b
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001436 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001437 return (AArch64::FPR64RegClass.contains(DstReg) ||
1438 AArch64::FPR128RegClass.contains(DstReg));
1439 }
1440 case AArch64::ORRv16i8:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001441 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1442 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001443 "invalid ORRv16i8 operands");
1444 return true;
1445 }
Renato Golin541d7e72014-08-01 17:27:31 +00001446 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001447 }
1448 return false;
1449}
1450
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001451unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001452 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001453 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001454 default:
1455 break;
1456 case AArch64::LDRWui:
1457 case AArch64::LDRXui:
1458 case AArch64::LDRBui:
1459 case AArch64::LDRHui:
1460 case AArch64::LDRSui:
1461 case AArch64::LDRDui:
1462 case AArch64::LDRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001463 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1464 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1465 FrameIndex = MI.getOperand(1).getIndex();
1466 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001467 }
1468 break;
1469 }
1470
1471 return 0;
1472}
1473
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001474unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001475 int &FrameIndex) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001476 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001477 default:
1478 break;
1479 case AArch64::STRWui:
1480 case AArch64::STRXui:
1481 case AArch64::STRBui:
1482 case AArch64::STRHui:
1483 case AArch64::STRSui:
1484 case AArch64::STRDui:
1485 case AArch64::STRQui:
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001486 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1487 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1488 FrameIndex = MI.getOperand(1).getIndex();
1489 return MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001490 }
1491 break;
1492 }
1493 return 0;
1494}
1495
1496/// Return true if this is load/store scales or extends its register offset.
1497/// This refers to scaling a dynamic index as opposed to scaled immediates.
1498/// MI should be a memory op that allows scaled addressing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001499bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
1500 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001501 default:
1502 break;
1503 case AArch64::LDRBBroW:
1504 case AArch64::LDRBroW:
1505 case AArch64::LDRDroW:
1506 case AArch64::LDRHHroW:
1507 case AArch64::LDRHroW:
1508 case AArch64::LDRQroW:
1509 case AArch64::LDRSBWroW:
1510 case AArch64::LDRSBXroW:
1511 case AArch64::LDRSHWroW:
1512 case AArch64::LDRSHXroW:
1513 case AArch64::LDRSWroW:
1514 case AArch64::LDRSroW:
1515 case AArch64::LDRWroW:
1516 case AArch64::LDRXroW:
1517 case AArch64::STRBBroW:
1518 case AArch64::STRBroW:
1519 case AArch64::STRDroW:
1520 case AArch64::STRHHroW:
1521 case AArch64::STRHroW:
1522 case AArch64::STRQroW:
1523 case AArch64::STRSroW:
1524 case AArch64::STRWroW:
1525 case AArch64::STRXroW:
1526 case AArch64::LDRBBroX:
1527 case AArch64::LDRBroX:
1528 case AArch64::LDRDroX:
1529 case AArch64::LDRHHroX:
1530 case AArch64::LDRHroX:
1531 case AArch64::LDRQroX:
1532 case AArch64::LDRSBWroX:
1533 case AArch64::LDRSBXroX:
1534 case AArch64::LDRSHWroX:
1535 case AArch64::LDRSHXroX:
1536 case AArch64::LDRSWroX:
1537 case AArch64::LDRSroX:
1538 case AArch64::LDRWroX:
1539 case AArch64::LDRXroX:
1540 case AArch64::STRBBroX:
1541 case AArch64::STRBroX:
1542 case AArch64::STRDroX:
1543 case AArch64::STRHHroX:
1544 case AArch64::STRHroX:
1545 case AArch64::STRQroX:
1546 case AArch64::STRSroX:
1547 case AArch64::STRWroX:
1548 case AArch64::STRXroX:
1549
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001550 unsigned Val = MI.getOperand(3).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001551 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1552 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1553 }
1554 return false;
1555}
1556
1557/// Check all MachineMemOperands for a hint to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001558bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
Justin Lebar288b3372016-07-14 18:15:20 +00001559 return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1560 return MMO->getFlags() & MOSuppressPair;
1561 });
Tim Northover3b0846e2014-05-24 12:50:23 +00001562}
1563
1564/// Set a flag on the first MachineMemOperand to suppress pairing.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001565void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const {
1566 if (MI.memoperands_empty())
Tim Northover3b0846e2014-05-24 12:50:23 +00001567 return;
Justin Lebar288b3372016-07-14 18:15:20 +00001568 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
Tim Northover3b0846e2014-05-24 12:50:23 +00001569}
1570
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001571bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const {
1572 switch (Opc) {
1573 default:
1574 return false;
1575 case AArch64::STURSi:
1576 case AArch64::STURDi:
1577 case AArch64::STURQi:
1578 case AArch64::STURBBi:
1579 case AArch64::STURHHi:
1580 case AArch64::STURWi:
1581 case AArch64::STURXi:
1582 case AArch64::LDURSi:
1583 case AArch64::LDURDi:
1584 case AArch64::LDURQi:
1585 case AArch64::LDURWi:
1586 case AArch64::LDURXi:
1587 case AArch64::LDURSWi:
1588 case AArch64::LDURHHi:
1589 case AArch64::LDURBBi:
1590 case AArch64::LDURSBWi:
1591 case AArch64::LDURSHWi:
1592 return true;
1593 }
1594}
1595
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001596bool AArch64InstrInfo::isUnscaledLdSt(MachineInstr &MI) const {
1597 return isUnscaledLdSt(MI.getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001598}
1599
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001600// Is this a candidate for ld/st merging or pairing? For example, we don't
1601// touch volatiles or load/stores that have a hint to avoid pair formation.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001602bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001603 // If this is a volatile load/store, don't mess with it.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001604 if (MI.hasOrderedMemoryRef())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001605 return false;
1606
1607 // Make sure this is a reg+imm (as opposed to an address reloc).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001608 assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
1609 if (!MI.getOperand(2).isImm())
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001610 return false;
1611
1612 // Can't merge/pair if the instruction modifies the base register.
1613 // e.g., ldr x0, [x0]
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001614 unsigned BaseReg = MI.getOperand(1).getReg();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001615 const TargetRegisterInfo *TRI = &getRegisterInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001616 if (MI.modifiesRegister(BaseReg, TRI))
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001617 return false;
1618
1619 // Check if this load/store has a hint to avoid pair formation.
1620 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1621 if (isLdStPairSuppressed(MI))
1622 return false;
1623
Matthias Braun651cff42016-06-02 18:03:53 +00001624 // On some CPUs quad load/store pairs are slower than two single load/stores.
1625 if (Subtarget.avoidQuadLdStPairs()) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001626 switch (MI.getOpcode()) {
Matthias Braunbcfd2362016-05-28 01:06:51 +00001627 default:
1628 break;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001629
Matthias Braunbcfd2362016-05-28 01:06:51 +00001630 case AArch64::LDURQi:
1631 case AArch64::STURQi:
1632 case AArch64::LDRQui:
1633 case AArch64::STRQui:
1634 return false;
Evandro Menezes8d53f882016-04-13 18:31:45 +00001635 }
Matthias Braunbcfd2362016-05-28 01:06:51 +00001636 }
Evandro Menezes8d53f882016-04-13 18:31:45 +00001637
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001638 return true;
1639}
1640
Chad Rosierc27a18f2016-03-09 16:00:35 +00001641bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001642 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
Chad Rosierc27a18f2016-03-09 16:00:35 +00001643 const TargetRegisterInfo *TRI) const {
Geoff Berry22dfbc52016-08-12 15:26:00 +00001644 unsigned Width;
1645 return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001646}
1647
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001648bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001649 MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
Chad Rosier3528c1e2014-09-08 14:43:48 +00001650 const TargetRegisterInfo *TRI) const {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001651 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
Chad Rosier3528c1e2014-09-08 14:43:48 +00001652 // Handle only loads/stores with base register followed by immediate offset.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001653 if (LdSt.getNumExplicitOperands() == 3) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001654 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001655 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001656 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001657 } else if (LdSt.getNumExplicitOperands() == 4) {
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001658 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001659 if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
1660 !LdSt.getOperand(3).isImm())
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001661 return false;
1662 } else
Chad Rosier3528c1e2014-09-08 14:43:48 +00001663 return false;
1664
1665 // Offset is calculated as the immediate operand multiplied by the scaling factor.
1666 // Unscaled instructions have scaling factor set to 1.
Chad Rosier0da267d2016-03-09 16:46:48 +00001667 unsigned Scale = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001668 switch (LdSt.getOpcode()) {
Chad Rosier3528c1e2014-09-08 14:43:48 +00001669 default:
1670 return false;
1671 case AArch64::LDURQi:
1672 case AArch64::STURQi:
1673 Width = 16;
1674 Scale = 1;
1675 break;
1676 case AArch64::LDURXi:
1677 case AArch64::LDURDi:
1678 case AArch64::STURXi:
1679 case AArch64::STURDi:
1680 Width = 8;
1681 Scale = 1;
1682 break;
1683 case AArch64::LDURWi:
1684 case AArch64::LDURSi:
1685 case AArch64::LDURSWi:
1686 case AArch64::STURWi:
1687 case AArch64::STURSi:
1688 Width = 4;
1689 Scale = 1;
1690 break;
1691 case AArch64::LDURHi:
1692 case AArch64::LDURHHi:
1693 case AArch64::LDURSHXi:
1694 case AArch64::LDURSHWi:
1695 case AArch64::STURHi:
1696 case AArch64::STURHHi:
1697 Width = 2;
1698 Scale = 1;
1699 break;
1700 case AArch64::LDURBi:
1701 case AArch64::LDURBBi:
1702 case AArch64::LDURSBXi:
1703 case AArch64::LDURSBWi:
1704 case AArch64::STURBi:
1705 case AArch64::STURBBi:
1706 Width = 1;
1707 Scale = 1;
1708 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001709 case AArch64::LDPQi:
1710 case AArch64::LDNPQi:
1711 case AArch64::STPQi:
1712 case AArch64::STNPQi:
1713 Scale = 16;
1714 Width = 32;
1715 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001716 case AArch64::LDRQui:
1717 case AArch64::STRQui:
1718 Scale = Width = 16;
1719 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001720 case AArch64::LDPXi:
1721 case AArch64::LDPDi:
1722 case AArch64::LDNPXi:
1723 case AArch64::LDNPDi:
1724 case AArch64::STPXi:
1725 case AArch64::STPDi:
1726 case AArch64::STNPXi:
1727 case AArch64::STNPDi:
1728 Scale = 8;
1729 Width = 16;
1730 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001731 case AArch64::LDRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001732 case AArch64::LDRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001733 case AArch64::STRXui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001734 case AArch64::STRDui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001735 Scale = Width = 8;
1736 break;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001737 case AArch64::LDPWi:
1738 case AArch64::LDPSi:
1739 case AArch64::LDNPWi:
1740 case AArch64::LDNPSi:
1741 case AArch64::STPWi:
1742 case AArch64::STPSi:
1743 case AArch64::STNPWi:
1744 case AArch64::STNPSi:
1745 Scale = 4;
1746 Width = 8;
1747 break;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001748 case AArch64::LDRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001749 case AArch64::LDRSui:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001750 case AArch64::LDRSWui:
Chad Rosier84a0afd2015-09-18 14:13:18 +00001751 case AArch64::STRWui:
Chad Rosier3528c1e2014-09-08 14:43:48 +00001752 case AArch64::STRSui:
1753 Scale = Width = 4;
1754 break;
Chad Rosier84a0afd2015-09-18 14:13:18 +00001755 case AArch64::LDRHui:
1756 case AArch64::LDRHHui:
1757 case AArch64::STRHui:
1758 case AArch64::STRHHui:
1759 Scale = Width = 2;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001760 break;
Chad Rosierd90e2eb2015-09-18 14:15:19 +00001761 case AArch64::LDRBui:
1762 case AArch64::LDRBBui:
1763 case AArch64::STRBui:
1764 case AArch64::STRBBui:
1765 Scale = Width = 1;
Chad Rosier3528c1e2014-09-08 14:43:48 +00001766 break;
Chad Rosier064261d2016-02-01 20:54:36 +00001767 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001768
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001769 if (LdSt.getNumExplicitOperands() == 3) {
1770 BaseReg = LdSt.getOperand(1).getReg();
1771 Offset = LdSt.getOperand(2).getImm() * Scale;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001772 } else {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001773 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1774 BaseReg = LdSt.getOperand(2).getReg();
1775 Offset = LdSt.getOperand(3).getImm() * Scale;
Chad Rosier1fbe9bc2016-04-15 18:09:10 +00001776 }
Chad Rosier3528c1e2014-09-08 14:43:48 +00001777 return true;
1778}
1779
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001780// Scale the unscaled offsets. Returns false if the unscaled offset can't be
1781// scaled.
1782static bool scaleOffset(unsigned Opc, int64_t &Offset) {
1783 unsigned OffsetStride = 1;
1784 switch (Opc) {
1785 default:
1786 return false;
1787 case AArch64::LDURQi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001788 case AArch64::STURQi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001789 OffsetStride = 16;
1790 break;
1791 case AArch64::LDURXi:
1792 case AArch64::LDURDi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001793 case AArch64::STURXi:
1794 case AArch64::STURDi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001795 OffsetStride = 8;
1796 break;
1797 case AArch64::LDURWi:
1798 case AArch64::LDURSi:
1799 case AArch64::LDURSWi:
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001800 case AArch64::STURWi:
1801 case AArch64::STURSi:
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001802 OffsetStride = 4;
1803 break;
1804 }
1805 // If the byte-offset isn't a multiple of the stride, we can't scale this
1806 // offset.
1807 if (Offset % OffsetStride != 0)
1808 return false;
1809
1810 // Convert the byte-offset used by unscaled into an "element" offset used
1811 // by the scaled pair load/store instructions.
1812 Offset /= OffsetStride;
1813 return true;
1814}
1815
1816static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
1817 if (FirstOpc == SecondOpc)
1818 return true;
1819 // We can also pair sign-ext and zero-ext instructions.
1820 switch (FirstOpc) {
1821 default:
1822 return false;
1823 case AArch64::LDRWui:
1824 case AArch64::LDURWi:
1825 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
1826 case AArch64::LDRSWui:
1827 case AArch64::LDURSWi:
1828 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
1829 }
1830 // These instructions can't be paired based on their opcodes.
1831 return false;
1832}
1833
Tim Northover3b0846e2014-05-24 12:50:23 +00001834/// Detect opportunities for ldp/stp formation.
1835///
Sanjoy Dasb666ea32015-06-15 18:44:14 +00001836/// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001837bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
1838 MachineInstr &SecondLdSt,
Jun Bum Lim4c5bd582016-04-15 14:58:38 +00001839 unsigned NumLoads) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001840 // Only cluster up to a single pair.
1841 if (NumLoads > 1)
1842 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001843
Geoff Berry22dfbc52016-08-12 15:26:00 +00001844 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
1845 return false;
1846
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001847 // Can we pair these instructions based on their opcodes?
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001848 unsigned FirstOpc = FirstLdSt.getOpcode();
1849 unsigned SecondOpc = SecondLdSt.getOpcode();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001850 if (!canPairLdStOpc(FirstOpc, SecondOpc))
Tim Northover3b0846e2014-05-24 12:50:23 +00001851 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001852
1853 // Can't merge volatiles or load/stores that have a hint to avoid pair
1854 // formation, for example.
1855 if (!isCandidateToMergeOrPair(FirstLdSt) ||
1856 !isCandidateToMergeOrPair(SecondLdSt))
Tim Northover3b0846e2014-05-24 12:50:23 +00001857 return false;
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001858
1859 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001860 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001861 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
1862 return false;
1863
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001864 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001865 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
1866 return false;
1867
1868 // Pairwise instructions have a 7-bit signed offset field.
1869 if (Offset1 > 63 || Offset1 < -64)
1870 return false;
1871
Tim Northover3b0846e2014-05-24 12:50:23 +00001872 // The caller should already have ordered First/SecondLdSt by offset.
Chad Rosiercdfd7e72016-03-18 19:21:02 +00001873 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
1874 return Offset1 + 1 == Offset2;
Tim Northover3b0846e2014-05-24 12:50:23 +00001875}
1876
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001877bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
1878 MachineInstr &Second) const {
Matthias Braun651cff42016-06-02 18:03:53 +00001879 if (Subtarget.hasMacroOpFusion()) {
1880 // Fuse CMN, CMP, TST followed by Bcc.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001881 unsigned SecondOpcode = Second.getOpcode();
Matthias Braunc8b67e62015-07-20 23:11:42 +00001882 if (SecondOpcode == AArch64::Bcc) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001883 switch (First.getOpcode()) {
Matthias Braunc8b67e62015-07-20 23:11:42 +00001884 default:
1885 return false;
1886 case AArch64::SUBSWri:
1887 case AArch64::ADDSWri:
1888 case AArch64::ANDSWri:
1889 case AArch64::SUBSXri:
1890 case AArch64::ADDSXri:
1891 case AArch64::ANDSXri:
1892 return true;
1893 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001894 }
Matthias Braun651cff42016-06-02 18:03:53 +00001895 // Fuse ALU operations followed by CBZ/CBNZ.
Matthias Braunc8b67e62015-07-20 23:11:42 +00001896 if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
1897 SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001898 switch (First.getOpcode()) {
Matthias Braunc8b67e62015-07-20 23:11:42 +00001899 default:
1900 return false;
1901 case AArch64::ADDWri:
1902 case AArch64::ADDXri:
1903 case AArch64::ANDWri:
1904 case AArch64::ANDXri:
1905 case AArch64::EORWri:
1906 case AArch64::EORXri:
1907 case AArch64::ORRWri:
1908 case AArch64::ORRXri:
1909 case AArch64::SUBWri:
1910 case AArch64::SUBXri:
1911 return true;
1912 }
Matthias Braune536f4f2015-07-20 22:34:47 +00001913 }
1914 }
1915 return false;
Tim Northover3b0846e2014-05-24 12:50:23 +00001916}
1917
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001918MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
1919 MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001920 const MDNode *Expr, const DebugLoc &DL) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001921 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
1922 .addFrameIndex(FrameIx)
1923 .addImm(0)
1924 .addImm(Offset)
Adrian Prantl87b7eb92014-10-01 18:55:02 +00001925 .addMetadata(Var)
1926 .addMetadata(Expr);
Tim Northover3b0846e2014-05-24 12:50:23 +00001927 return &*MIB;
1928}
1929
1930static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
1931 unsigned Reg, unsigned SubIdx,
1932 unsigned State,
1933 const TargetRegisterInfo *TRI) {
1934 if (!SubIdx)
1935 return MIB.addReg(Reg, State);
1936
1937 if (TargetRegisterInfo::isPhysicalRegister(Reg))
1938 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
1939 return MIB.addReg(Reg, State, SubIdx);
1940}
1941
1942static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
1943 unsigned NumRegs) {
1944 // We really want the positive remainder mod 32 here, that happens to be
1945 // easily obtainable with a mask.
1946 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
1947}
1948
1949void AArch64InstrInfo::copyPhysRegTuple(
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001950 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00001951 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
1952 llvm::ArrayRef<unsigned> Indices) const {
Eric Christopher58f32662014-06-10 22:57:21 +00001953 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00001954 "Unexpected register copy without NEON");
Eric Christophera0de2532015-03-18 20:37:30 +00001955 const TargetRegisterInfo *TRI = &getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001956 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
1957 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
1958 unsigned NumRegs = Indices.size();
1959
1960 int SubReg = 0, End = NumRegs, Incr = 1;
1961 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
1962 SubReg = NumRegs - 1;
1963 End = -1;
1964 Incr = -1;
1965 }
1966
1967 for (; SubReg != End; SubReg += Incr) {
James Molloyf8aa57a2015-04-16 11:37:40 +00001968 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
Tim Northover3b0846e2014-05-24 12:50:23 +00001969 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
1970 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
1971 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
1972 }
1973}
1974
1975void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001976 MachineBasicBlock::iterator I,
1977 const DebugLoc &DL, unsigned DestReg,
1978 unsigned SrcReg, bool KillSrc) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00001979 if (AArch64::GPR32spRegClass.contains(DestReg) &&
1980 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
Eric Christophera0de2532015-03-18 20:37:30 +00001981 const TargetRegisterInfo *TRI = &getRegisterInfo();
1982
Tim Northover3b0846e2014-05-24 12:50:23 +00001983 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
1984 // If either operand is WSP, expand to ADD #0.
1985 if (Subtarget.hasZeroCycleRegMove()) {
1986 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
1987 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
1988 &AArch64::GPR64spRegClass);
1989 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
1990 &AArch64::GPR64spRegClass);
1991 // This instruction is reading and writing X registers. This may upset
1992 // the register scavenger and machine verifier, so we need to indicate
1993 // that we are reading an undefined value from SrcRegX, but a proper
1994 // value from SrcReg.
1995 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
1996 .addReg(SrcRegX, RegState::Undef)
1997 .addImm(0)
1998 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1999 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2000 } else {
2001 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2002 .addReg(SrcReg, getKillRegState(KillSrc))
2003 .addImm(0)
2004 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2005 }
2006 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2007 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
2008 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2009 } else {
2010 if (Subtarget.hasZeroCycleRegMove()) {
2011 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2012 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2013 &AArch64::GPR64spRegClass);
2014 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2015 &AArch64::GPR64spRegClass);
2016 // This instruction is reading and writing X registers. This may upset
2017 // the register scavenger and machine verifier, so we need to indicate
2018 // that we are reading an undefined value from SrcRegX, but a proper
2019 // value from SrcReg.
2020 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2021 .addReg(AArch64::XZR)
2022 .addReg(SrcRegX, RegState::Undef)
2023 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2024 } else {
2025 // Otherwise, expand to ORR WZR.
2026 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2027 .addReg(AArch64::WZR)
2028 .addReg(SrcReg, getKillRegState(KillSrc));
2029 }
2030 }
2031 return;
2032 }
2033
2034 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2035 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2036 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2037 // If either operand is SP, expand to ADD #0.
2038 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2039 .addReg(SrcReg, getKillRegState(KillSrc))
2040 .addImm(0)
2041 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2042 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2043 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
2044 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2045 } else {
2046 // Otherwise, expand to ORR XZR.
2047 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2048 .addReg(AArch64::XZR)
2049 .addReg(SrcReg, getKillRegState(KillSrc));
2050 }
2051 return;
2052 }
2053
2054 // Copy a DDDD register quad by copying the individual sub-registers.
2055 if (AArch64::DDDDRegClass.contains(DestReg) &&
2056 AArch64::DDDDRegClass.contains(SrcReg)) {
2057 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2058 AArch64::dsub2, AArch64::dsub3 };
2059 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2060 Indices);
2061 return;
2062 }
2063
2064 // Copy a DDD register triple by copying the individual sub-registers.
2065 if (AArch64::DDDRegClass.contains(DestReg) &&
2066 AArch64::DDDRegClass.contains(SrcReg)) {
2067 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
2068 AArch64::dsub2 };
2069 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2070 Indices);
2071 return;
2072 }
2073
2074 // Copy a DD register pair by copying the individual sub-registers.
2075 if (AArch64::DDRegClass.contains(DestReg) &&
2076 AArch64::DDRegClass.contains(SrcReg)) {
2077 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
2078 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2079 Indices);
2080 return;
2081 }
2082
2083 // Copy a QQQQ register quad by copying the individual sub-registers.
2084 if (AArch64::QQQQRegClass.contains(DestReg) &&
2085 AArch64::QQQQRegClass.contains(SrcReg)) {
2086 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2087 AArch64::qsub2, AArch64::qsub3 };
2088 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2089 Indices);
2090 return;
2091 }
2092
2093 // Copy a QQQ register triple by copying the individual sub-registers.
2094 if (AArch64::QQQRegClass.contains(DestReg) &&
2095 AArch64::QQQRegClass.contains(SrcReg)) {
2096 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
2097 AArch64::qsub2 };
2098 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2099 Indices);
2100 return;
2101 }
2102
2103 // Copy a QQ register pair by copying the individual sub-registers.
2104 if (AArch64::QQRegClass.contains(DestReg) &&
2105 AArch64::QQRegClass.contains(SrcReg)) {
2106 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
2107 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2108 Indices);
2109 return;
2110 }
2111
2112 if (AArch64::FPR128RegClass.contains(DestReg) &&
2113 AArch64::FPR128RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002114 if(Subtarget.hasNEON()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00002115 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2116 .addReg(SrcReg)
2117 .addReg(SrcReg, getKillRegState(KillSrc));
2118 } else {
2119 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2120 .addReg(AArch64::SP, RegState::Define)
2121 .addReg(SrcReg, getKillRegState(KillSrc))
2122 .addReg(AArch64::SP)
2123 .addImm(-16);
2124 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2125 .addReg(AArch64::SP, RegState::Define)
2126 .addReg(DestReg, RegState::Define)
2127 .addReg(AArch64::SP)
2128 .addImm(16);
2129 }
2130 return;
2131 }
2132
2133 if (AArch64::FPR64RegClass.contains(DestReg) &&
2134 AArch64::FPR64RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002135 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002136 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2137 &AArch64::FPR128RegClass);
2138 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2139 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002140 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2141 .addReg(SrcReg)
2142 .addReg(SrcReg, getKillRegState(KillSrc));
2143 } else {
2144 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2145 .addReg(SrcReg, getKillRegState(KillSrc));
2146 }
2147 return;
2148 }
2149
2150 if (AArch64::FPR32RegClass.contains(DestReg) &&
2151 AArch64::FPR32RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002152 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002153 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2154 &AArch64::FPR128RegClass);
2155 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2156 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002157 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2158 .addReg(SrcReg)
2159 .addReg(SrcReg, getKillRegState(KillSrc));
2160 } else {
2161 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2162 .addReg(SrcReg, getKillRegState(KillSrc));
2163 }
2164 return;
2165 }
2166
2167 if (AArch64::FPR16RegClass.contains(DestReg) &&
2168 AArch64::FPR16RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002169 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002170 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2171 &AArch64::FPR128RegClass);
2172 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2173 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002174 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2175 .addReg(SrcReg)
2176 .addReg(SrcReg, getKillRegState(KillSrc));
2177 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002178 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2179 &AArch64::FPR32RegClass);
2180 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2181 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002182 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2183 .addReg(SrcReg, getKillRegState(KillSrc));
2184 }
2185 return;
2186 }
2187
2188 if (AArch64::FPR8RegClass.contains(DestReg) &&
2189 AArch64::FPR8RegClass.contains(SrcReg)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002190 if(Subtarget.hasNEON()) {
Eric Christophera0de2532015-03-18 20:37:30 +00002191 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
Tim Northover3b0846e2014-05-24 12:50:23 +00002192 &AArch64::FPR128RegClass);
Eric Christophera0de2532015-03-18 20:37:30 +00002193 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2194 &AArch64::FPR128RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002195 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2196 .addReg(SrcReg)
2197 .addReg(SrcReg, getKillRegState(KillSrc));
2198 } else {
Eric Christophera0de2532015-03-18 20:37:30 +00002199 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2200 &AArch64::FPR32RegClass);
2201 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2202 &AArch64::FPR32RegClass);
Tim Northover3b0846e2014-05-24 12:50:23 +00002203 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2204 .addReg(SrcReg, getKillRegState(KillSrc));
2205 }
2206 return;
2207 }
2208
2209 // Copies between GPR64 and FPR64.
2210 if (AArch64::FPR64RegClass.contains(DestReg) &&
2211 AArch64::GPR64RegClass.contains(SrcReg)) {
2212 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2213 .addReg(SrcReg, getKillRegState(KillSrc));
2214 return;
2215 }
2216 if (AArch64::GPR64RegClass.contains(DestReg) &&
2217 AArch64::FPR64RegClass.contains(SrcReg)) {
2218 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2219 .addReg(SrcReg, getKillRegState(KillSrc));
2220 return;
2221 }
2222 // Copies between GPR32 and FPR32.
2223 if (AArch64::FPR32RegClass.contains(DestReg) &&
2224 AArch64::GPR32RegClass.contains(SrcReg)) {
2225 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2226 .addReg(SrcReg, getKillRegState(KillSrc));
2227 return;
2228 }
2229 if (AArch64::GPR32RegClass.contains(DestReg) &&
2230 AArch64::FPR32RegClass.contains(SrcReg)) {
2231 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2232 .addReg(SrcReg, getKillRegState(KillSrc));
2233 return;
2234 }
2235
Tim Northover1bed9af2014-05-27 12:16:02 +00002236 if (DestReg == AArch64::NZCV) {
2237 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2238 BuildMI(MBB, I, DL, get(AArch64::MSR))
2239 .addImm(AArch64SysReg::NZCV)
2240 .addReg(SrcReg, getKillRegState(KillSrc))
2241 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2242 return;
2243 }
2244
2245 if (SrcReg == AArch64::NZCV) {
2246 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
Quentin Colombet658d9db2016-04-22 18:46:17 +00002247 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
Tim Northover1bed9af2014-05-27 12:16:02 +00002248 .addImm(AArch64SysReg::NZCV)
2249 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2250 return;
2251 }
2252
2253 llvm_unreachable("unimplemented reg-to-reg copy");
Tim Northover3b0846e2014-05-24 12:50:23 +00002254}
2255
2256void AArch64InstrInfo::storeRegToStackSlot(
2257 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2258 bool isKill, int FI, const TargetRegisterClass *RC,
2259 const TargetRegisterInfo *TRI) const {
2260 DebugLoc DL;
2261 if (MBBI != MBB.end())
2262 DL = MBBI->getDebugLoc();
2263 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002264 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002265 unsigned Align = MFI.getObjectAlignment(FI);
2266
Alex Lorenze40c8a22015-08-11 23:09:45 +00002267 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002268 MachineMemOperand *MMO = MF.getMachineMemOperand(
2269 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2270 unsigned Opc = 0;
2271 bool Offset = true;
2272 switch (RC->getSize()) {
2273 case 1:
2274 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2275 Opc = AArch64::STRBui;
2276 break;
2277 case 2:
2278 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2279 Opc = AArch64::STRHui;
2280 break;
2281 case 4:
2282 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2283 Opc = AArch64::STRWui;
2284 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2285 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2286 else
2287 assert(SrcReg != AArch64::WSP);
2288 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2289 Opc = AArch64::STRSui;
2290 break;
2291 case 8:
2292 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2293 Opc = AArch64::STRXui;
2294 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2295 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2296 else
2297 assert(SrcReg != AArch64::SP);
2298 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2299 Opc = AArch64::STRDui;
2300 break;
2301 case 16:
2302 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2303 Opc = AArch64::STRQui;
2304 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002305 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002306 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002307 Opc = AArch64::ST1Twov1d;
2308 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002309 }
2310 break;
2311 case 24:
2312 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002313 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002314 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002315 Opc = AArch64::ST1Threev1d;
2316 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002317 }
2318 break;
2319 case 32:
2320 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002321 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002322 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002323 Opc = AArch64::ST1Fourv1d;
2324 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002325 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002326 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002327 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002328 Opc = AArch64::ST1Twov2d;
2329 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002330 }
2331 break;
2332 case 48:
2333 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002334 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002335 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002336 Opc = AArch64::ST1Threev2d;
2337 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002338 }
2339 break;
2340 case 64:
2341 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002342 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002343 "Unexpected register store without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002344 Opc = AArch64::ST1Fourv2d;
2345 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002346 }
2347 break;
2348 }
2349 assert(Opc && "Unknown register class");
2350
James Molloyf8aa57a2015-04-16 11:37:40 +00002351 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002352 .addReg(SrcReg, getKillRegState(isKill))
2353 .addFrameIndex(FI);
2354
2355 if (Offset)
2356 MI.addImm(0);
2357 MI.addMemOperand(MMO);
2358}
2359
2360void AArch64InstrInfo::loadRegFromStackSlot(
2361 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2362 int FI, const TargetRegisterClass *RC,
2363 const TargetRegisterInfo *TRI) const {
2364 DebugLoc DL;
2365 if (MBBI != MBB.end())
2366 DL = MBBI->getDebugLoc();
2367 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +00002368 MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00002369 unsigned Align = MFI.getObjectAlignment(FI);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002370 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
Tim Northover3b0846e2014-05-24 12:50:23 +00002371 MachineMemOperand *MMO = MF.getMachineMemOperand(
2372 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2373
2374 unsigned Opc = 0;
2375 bool Offset = true;
2376 switch (RC->getSize()) {
2377 case 1:
2378 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2379 Opc = AArch64::LDRBui;
2380 break;
2381 case 2:
2382 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2383 Opc = AArch64::LDRHui;
2384 break;
2385 case 4:
2386 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2387 Opc = AArch64::LDRWui;
2388 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2389 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2390 else
2391 assert(DestReg != AArch64::WSP);
2392 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2393 Opc = AArch64::LDRSui;
2394 break;
2395 case 8:
2396 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2397 Opc = AArch64::LDRXui;
2398 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2399 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2400 else
2401 assert(DestReg != AArch64::SP);
2402 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2403 Opc = AArch64::LDRDui;
2404 break;
2405 case 16:
2406 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2407 Opc = AArch64::LDRQui;
2408 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002409 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002410 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002411 Opc = AArch64::LD1Twov1d;
2412 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002413 }
2414 break;
2415 case 24:
2416 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002417 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002418 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002419 Opc = AArch64::LD1Threev1d;
2420 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002421 }
2422 break;
2423 case 32:
2424 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002425 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002426 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002427 Opc = AArch64::LD1Fourv1d;
2428 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002429 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002430 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002431 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002432 Opc = AArch64::LD1Twov2d;
2433 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002434 }
2435 break;
2436 case 48:
2437 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002438 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002439 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002440 Opc = AArch64::LD1Threev2d;
2441 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002442 }
2443 break;
2444 case 64:
2445 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
Eric Christopher58f32662014-06-10 22:57:21 +00002446 assert(Subtarget.hasNEON() &&
Tim Northover3b0846e2014-05-24 12:50:23 +00002447 "Unexpected register load without NEON");
Richard Trieu7a083812016-02-18 22:09:30 +00002448 Opc = AArch64::LD1Fourv2d;
2449 Offset = false;
Tim Northover3b0846e2014-05-24 12:50:23 +00002450 }
2451 break;
2452 }
2453 assert(Opc && "Unknown register class");
2454
James Molloyf8aa57a2015-04-16 11:37:40 +00002455 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
Tim Northover3b0846e2014-05-24 12:50:23 +00002456 .addReg(DestReg, getDefRegState(true))
2457 .addFrameIndex(FI);
2458 if (Offset)
2459 MI.addImm(0);
2460 MI.addMemOperand(MMO);
2461}
2462
2463void llvm::emitFrameOffset(MachineBasicBlock &MBB,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002464 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
Tim Northover3b0846e2014-05-24 12:50:23 +00002465 unsigned DestReg, unsigned SrcReg, int Offset,
Eric Christopherbc76b972014-06-10 17:33:39 +00002466 const TargetInstrInfo *TII,
Tim Northover3b0846e2014-05-24 12:50:23 +00002467 MachineInstr::MIFlag Flag, bool SetNZCV) {
2468 if (DestReg == SrcReg && Offset == 0)
2469 return;
2470
Geoff Berrya5335642016-05-06 16:34:59 +00002471 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2472 "SP increment/decrement not 16-byte aligned");
2473
Tim Northover3b0846e2014-05-24 12:50:23 +00002474 bool isSub = Offset < 0;
2475 if (isSub)
2476 Offset = -Offset;
2477
2478 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2479 // scratch register. If DestReg is a virtual register, use it as the
2480 // scratch register; otherwise, create a new virtual register (to be
2481 // replaced by the scavenger at the end of PEI). That case can be optimized
2482 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2483 // register can be loaded with offset%8 and the add/sub can use an extending
2484 // instruction with LSL#3.
2485 // Currently the function handles any offsets but generates a poor sequence
2486 // of code.
2487 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2488
2489 unsigned Opc;
2490 if (SetNZCV)
2491 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2492 else
2493 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2494 const unsigned MaxEncoding = 0xfff;
2495 const unsigned ShiftSize = 12;
2496 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2497 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2498 unsigned ThisVal;
2499 if (((unsigned)Offset) > MaxEncodableValue) {
2500 ThisVal = MaxEncodableValue;
2501 } else {
2502 ThisVal = Offset & MaxEncodableValue;
2503 }
2504 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2505 "Encoding cannot handle value that big");
2506 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2507 .addReg(SrcReg)
2508 .addImm(ThisVal >> ShiftSize)
2509 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2510 .setMIFlag(Flag);
2511
2512 SrcReg = DestReg;
2513 Offset -= ThisVal;
2514 if (Offset == 0)
2515 return;
2516 }
2517 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2518 .addReg(SrcReg)
2519 .addImm(Offset)
2520 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2521 .setMIFlag(Flag);
2522}
2523
Keno Fischere70b31f2015-06-08 20:09:58 +00002524MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002525 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
Jonas Paulsson8e5b0c62016-05-10 08:09:37 +00002526 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2527 LiveIntervals *LIS) const {
Tim Northover3b0846e2014-05-24 12:50:23 +00002528 // This is a bit of a hack. Consider this instruction:
2529 //
2530 // %vreg0<def> = COPY %SP; GPR64all:%vreg0
2531 //
2532 // We explicitly chose GPR64all for the virtual register so such a copy might
2533 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2534 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
2535 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2536 //
2537 // To prevent that, we are going to constrain the %vreg0 register class here.
2538 //
2539 // <rdar://problem/11522048>
2540 //
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00002541 if (MI.isCopy()) {
2542 unsigned DstReg = MI.getOperand(0).getReg();
2543 unsigned SrcReg = MI.getOperand(1).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00002544 if (SrcReg == AArch64::SP &&
2545 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2546 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2547 return nullptr;
2548 }
2549 if (DstReg == AArch64::SP &&
2550 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2551 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2552 return nullptr;
2553 }
2554 }
2555
2556 // Cannot fold.
2557 return nullptr;
2558}
2559
2560int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
2561 bool *OutUseUnscaledOp,
2562 unsigned *OutUnscaledOp,
2563 int *EmittableOffset) {
2564 int Scale = 1;
2565 bool IsSigned = false;
2566 // The ImmIdx should be changed case by case if it is not 2.
2567 unsigned ImmIdx = 2;
2568 unsigned UnscaledOp = 0;
2569 // Set output values in case of early exit.
2570 if (EmittableOffset)
2571 *EmittableOffset = 0;
2572 if (OutUseUnscaledOp)
2573 *OutUseUnscaledOp = false;
2574 if (OutUnscaledOp)
2575 *OutUnscaledOp = 0;
2576 switch (MI.getOpcode()) {
2577 default:
Craig Topper2a30d782014-06-18 05:05:13 +00002578 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
Tim Northover3b0846e2014-05-24 12:50:23 +00002579 // Vector spills/fills can't take an immediate offset.
2580 case AArch64::LD1Twov2d:
2581 case AArch64::LD1Threev2d:
2582 case AArch64::LD1Fourv2d:
2583 case AArch64::LD1Twov1d:
2584 case AArch64::LD1Threev1d:
2585 case AArch64::LD1Fourv1d:
2586 case AArch64::ST1Twov2d:
2587 case AArch64::ST1Threev2d:
2588 case AArch64::ST1Fourv2d:
2589 case AArch64::ST1Twov1d:
2590 case AArch64::ST1Threev1d:
2591 case AArch64::ST1Fourv1d:
2592 return AArch64FrameOffsetCannotUpdate;
2593 case AArch64::PRFMui:
2594 Scale = 8;
2595 UnscaledOp = AArch64::PRFUMi;
2596 break;
2597 case AArch64::LDRXui:
2598 Scale = 8;
2599 UnscaledOp = AArch64::LDURXi;
2600 break;
2601 case AArch64::LDRWui:
2602 Scale = 4;
2603 UnscaledOp = AArch64::LDURWi;
2604 break;
2605 case AArch64::LDRBui:
2606 Scale = 1;
2607 UnscaledOp = AArch64::LDURBi;
2608 break;
2609 case AArch64::LDRHui:
2610 Scale = 2;
2611 UnscaledOp = AArch64::LDURHi;
2612 break;
2613 case AArch64::LDRSui:
2614 Scale = 4;
2615 UnscaledOp = AArch64::LDURSi;
2616 break;
2617 case AArch64::LDRDui:
2618 Scale = 8;
2619 UnscaledOp = AArch64::LDURDi;
2620 break;
2621 case AArch64::LDRQui:
2622 Scale = 16;
2623 UnscaledOp = AArch64::LDURQi;
2624 break;
2625 case AArch64::LDRBBui:
2626 Scale = 1;
2627 UnscaledOp = AArch64::LDURBBi;
2628 break;
2629 case AArch64::LDRHHui:
2630 Scale = 2;
2631 UnscaledOp = AArch64::LDURHHi;
2632 break;
2633 case AArch64::LDRSBXui:
2634 Scale = 1;
2635 UnscaledOp = AArch64::LDURSBXi;
2636 break;
2637 case AArch64::LDRSBWui:
2638 Scale = 1;
2639 UnscaledOp = AArch64::LDURSBWi;
2640 break;
2641 case AArch64::LDRSHXui:
2642 Scale = 2;
2643 UnscaledOp = AArch64::LDURSHXi;
2644 break;
2645 case AArch64::LDRSHWui:
2646 Scale = 2;
2647 UnscaledOp = AArch64::LDURSHWi;
2648 break;
2649 case AArch64::LDRSWui:
2650 Scale = 4;
2651 UnscaledOp = AArch64::LDURSWi;
2652 break;
2653
2654 case AArch64::STRXui:
2655 Scale = 8;
2656 UnscaledOp = AArch64::STURXi;
2657 break;
2658 case AArch64::STRWui:
2659 Scale = 4;
2660 UnscaledOp = AArch64::STURWi;
2661 break;
2662 case AArch64::STRBui:
2663 Scale = 1;
2664 UnscaledOp = AArch64::STURBi;
2665 break;
2666 case AArch64::STRHui:
2667 Scale = 2;
2668 UnscaledOp = AArch64::STURHi;
2669 break;
2670 case AArch64::STRSui:
2671 Scale = 4;
2672 UnscaledOp = AArch64::STURSi;
2673 break;
2674 case AArch64::STRDui:
2675 Scale = 8;
2676 UnscaledOp = AArch64::STURDi;
2677 break;
2678 case AArch64::STRQui:
2679 Scale = 16;
2680 UnscaledOp = AArch64::STURQi;
2681 break;
2682 case AArch64::STRBBui:
2683 Scale = 1;
2684 UnscaledOp = AArch64::STURBBi;
2685 break;
2686 case AArch64::STRHHui:
2687 Scale = 2;
2688 UnscaledOp = AArch64::STURHHi;
2689 break;
2690
2691 case AArch64::LDPXi:
2692 case AArch64::LDPDi:
2693 case AArch64::STPXi:
2694 case AArch64::STPDi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002695 case AArch64::LDNPXi:
2696 case AArch64::LDNPDi:
2697 case AArch64::STNPXi:
2698 case AArch64::STNPDi:
2699 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002700 IsSigned = true;
2701 Scale = 8;
2702 break;
2703 case AArch64::LDPQi:
2704 case AArch64::STPQi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002705 case AArch64::LDNPQi:
2706 case AArch64::STNPQi:
2707 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002708 IsSigned = true;
2709 Scale = 16;
2710 break;
2711 case AArch64::LDPWi:
2712 case AArch64::LDPSi:
2713 case AArch64::STPWi:
2714 case AArch64::STPSi:
Ahmed Bougacha05541452015-09-10 01:54:43 +00002715 case AArch64::LDNPWi:
2716 case AArch64::LDNPSi:
2717 case AArch64::STNPWi:
2718 case AArch64::STNPSi:
2719 ImmIdx = 3;
Tim Northover3b0846e2014-05-24 12:50:23 +00002720 IsSigned = true;
2721 Scale = 4;
2722 break;
2723
2724 case AArch64::LDURXi:
2725 case AArch64::LDURWi:
2726 case AArch64::LDURBi:
2727 case AArch64::LDURHi:
2728 case AArch64::LDURSi:
2729 case AArch64::LDURDi:
2730 case AArch64::LDURQi:
2731 case AArch64::LDURHHi:
2732 case AArch64::LDURBBi:
2733 case AArch64::LDURSBXi:
2734 case AArch64::LDURSBWi:
2735 case AArch64::LDURSHXi:
2736 case AArch64::LDURSHWi:
2737 case AArch64::LDURSWi:
2738 case AArch64::STURXi:
2739 case AArch64::STURWi:
2740 case AArch64::STURBi:
2741 case AArch64::STURHi:
2742 case AArch64::STURSi:
2743 case AArch64::STURDi:
2744 case AArch64::STURQi:
2745 case AArch64::STURBBi:
2746 case AArch64::STURHHi:
2747 Scale = 1;
2748 break;
2749 }
2750
2751 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
2752
2753 bool useUnscaledOp = false;
2754 // If the offset doesn't match the scale, we rewrite the instruction to
2755 // use the unscaled instruction instead. Likewise, if we have a negative
2756 // offset (and have an unscaled op to use).
2757 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
2758 useUnscaledOp = true;
2759
2760 // Use an unscaled addressing mode if the instruction has a negative offset
2761 // (or if the instruction is already using an unscaled addressing mode).
2762 unsigned MaskBits;
2763 if (IsSigned) {
2764 // ldp/stp instructions.
2765 MaskBits = 7;
2766 Offset /= Scale;
2767 } else if (UnscaledOp == 0 || useUnscaledOp) {
2768 MaskBits = 9;
2769 IsSigned = true;
2770 Scale = 1;
2771 } else {
2772 MaskBits = 12;
2773 IsSigned = false;
2774 Offset /= Scale;
2775 }
2776
2777 // Attempt to fold address computation.
2778 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
2779 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
2780 if (Offset >= MinOff && Offset <= MaxOff) {
2781 if (EmittableOffset)
2782 *EmittableOffset = Offset;
2783 Offset = 0;
2784 } else {
2785 int NewOff = Offset < 0 ? MinOff : MaxOff;
2786 if (EmittableOffset)
2787 *EmittableOffset = NewOff;
2788 Offset = (Offset - NewOff) * Scale;
2789 }
2790 if (OutUseUnscaledOp)
2791 *OutUseUnscaledOp = useUnscaledOp;
2792 if (OutUnscaledOp)
2793 *OutUnscaledOp = UnscaledOp;
2794 return AArch64FrameOffsetCanUpdate |
2795 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
2796}
2797
2798bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
2799 unsigned FrameReg, int &Offset,
2800 const AArch64InstrInfo *TII) {
2801 unsigned Opcode = MI.getOpcode();
2802 unsigned ImmIdx = FrameRegIdx + 1;
2803
2804 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
2805 Offset += MI.getOperand(ImmIdx).getImm();
2806 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
2807 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
2808 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
2809 MI.eraseFromParent();
2810 Offset = 0;
2811 return true;
2812 }
2813
2814 int NewOffset;
2815 unsigned UnscaledOp;
2816 bool UseUnscaledOp;
2817 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
2818 &UnscaledOp, &NewOffset);
2819 if (Status & AArch64FrameOffsetCanUpdate) {
2820 if (Status & AArch64FrameOffsetIsLegal)
2821 // Replace the FrameIndex with FrameReg.
2822 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2823 if (UseUnscaledOp)
2824 MI.setDesc(TII->get(UnscaledOp));
2825
2826 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
2827 return Offset == 0;
2828 }
2829
2830 return false;
2831}
2832
2833void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
2834 NopInst.setOpcode(AArch64::HINT);
Jim Grosbache9119e42015-05-13 18:37:00 +00002835 NopInst.addOperand(MCOperand::createImm(0));
Tim Northover3b0846e2014-05-24 12:50:23 +00002836}
Chad Rosier9d1a5562016-05-02 14:56:21 +00002837
2838// AArch64 supports MachineCombiner.
Benjamin Kramer8c90fd72014-09-03 11:41:21 +00002839bool AArch64InstrInfo::useMachineCombiner() const {
Chad Rosier9d1a5562016-05-02 14:56:21 +00002840
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002841 return true;
2842}
2843//
2844// True when Opc sets flag
2845static bool isCombineInstrSettingFlag(unsigned Opc) {
2846 switch (Opc) {
2847 case AArch64::ADDSWrr:
2848 case AArch64::ADDSWri:
2849 case AArch64::ADDSXrr:
2850 case AArch64::ADDSXri:
2851 case AArch64::SUBSWrr:
2852 case AArch64::SUBSXrr:
2853 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2854 case AArch64::SUBSWri:
2855 case AArch64::SUBSXri:
2856 return true;
2857 default:
2858 break;
2859 }
2860 return false;
2861}
2862//
2863// 32b Opcodes that can be combined with a MUL
2864static bool isCombineInstrCandidate32(unsigned Opc) {
2865 switch (Opc) {
2866 case AArch64::ADDWrr:
2867 case AArch64::ADDWri:
2868 case AArch64::SUBWrr:
2869 case AArch64::ADDSWrr:
2870 case AArch64::ADDSWri:
2871 case AArch64::SUBSWrr:
2872 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2873 case AArch64::SUBWri:
2874 case AArch64::SUBSWri:
2875 return true;
2876 default:
2877 break;
2878 }
2879 return false;
2880}
2881//
2882// 64b Opcodes that can be combined with a MUL
2883static bool isCombineInstrCandidate64(unsigned Opc) {
2884 switch (Opc) {
2885 case AArch64::ADDXrr:
2886 case AArch64::ADDXri:
2887 case AArch64::SUBXrr:
2888 case AArch64::ADDSXrr:
2889 case AArch64::ADDSXri:
2890 case AArch64::SUBSXrr:
2891 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
2892 case AArch64::SUBXri:
2893 case AArch64::SUBSXri:
2894 return true;
2895 default:
2896 break;
2897 }
2898 return false;
2899}
2900//
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002901// FP Opcodes that can be combined with a FMUL
2902static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
2903 switch (Inst.getOpcode()) {
2904 case AArch64::FADDSrr:
2905 case AArch64::FADDDrr:
2906 case AArch64::FADDv2f32:
2907 case AArch64::FADDv2f64:
2908 case AArch64::FADDv4f32:
2909 case AArch64::FSUBSrr:
2910 case AArch64::FSUBDrr:
2911 case AArch64::FSUBv2f32:
2912 case AArch64::FSUBv2f64:
2913 case AArch64::FSUBv4f32:
2914 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2915 default:
2916 break;
2917 }
2918 return false;
2919}
2920//
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002921// Opcodes that can be combined with a MUL
2922static bool isCombineInstrCandidate(unsigned Opc) {
2923 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
2924}
2925
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002926//
2927// Utility routine that checks if \param MO is defined by an
2928// \param CombineOpc instruction in the basic block \param MBB
2929static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
2930 unsigned CombineOpc, unsigned ZeroReg = 0,
2931 bool CheckZeroReg = false) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002932 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2933 MachineInstr *MI = nullptr;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002934
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002935 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
2936 MI = MRI.getUniqueVRegDef(MO.getReg());
2937 // And it needs to be in the trace (otherwise, it won't have a depth).
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002938 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002939 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002940 // Must only used by the user we combine with.
Gerolf Hoflehnerfe2c11f2014-08-13 22:07:36 +00002941 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002942 return false;
2943
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002944 if (CheckZeroReg) {
2945 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
2946 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
2947 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
2948 // The third input reg must be zero.
2949 if (MI->getOperand(3).getReg() != ZeroReg)
2950 return false;
2951 }
2952
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002953 return true;
2954}
2955
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00002956//
2957// Is \param MO defined by an integer multiply and can be combined?
2958static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2959 unsigned MulOpc, unsigned ZeroReg) {
2960 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
2961}
2962
2963//
2964// Is \param MO defined by a floating-point multiply and can be combined?
2965static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
2966 unsigned MulOpc) {
2967 return canCombine(MBB, MO, MulOpc);
2968}
2969
Haicheng Wu08b94622016-01-07 04:01:02 +00002970// TODO: There are many more machine instruction opcodes to match:
2971// 1. Other data types (integer, vectors)
2972// 2. Other math / logic operations (xor, or)
2973// 3. Other forms of the same operation (intrinsics and other variants)
2974bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
2975 switch (Inst.getOpcode()) {
2976 case AArch64::FADDDrr:
2977 case AArch64::FADDSrr:
2978 case AArch64::FADDv2f32:
2979 case AArch64::FADDv2f64:
2980 case AArch64::FADDv4f32:
2981 case AArch64::FMULDrr:
2982 case AArch64::FMULSrr:
2983 case AArch64::FMULX32:
2984 case AArch64::FMULX64:
2985 case AArch64::FMULXv2f32:
2986 case AArch64::FMULXv2f64:
2987 case AArch64::FMULXv4f32:
2988 case AArch64::FMULv2f32:
2989 case AArch64::FMULv2f64:
2990 case AArch64::FMULv4f32:
2991 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
2992 default:
2993 return false;
2994 }
2995}
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00002996
Haicheng Wu08b94622016-01-07 04:01:02 +00002997/// Find instructions that can be turned into madd.
2998static bool getMaddPatterns(MachineInstr &Root,
2999 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003000 unsigned Opc = Root.getOpcode();
3001 MachineBasicBlock &MBB = *Root.getParent();
3002 bool Found = false;
3003
3004 if (!isCombineInstrCandidate(Opc))
Chad Rosier85c85942016-03-23 20:07:28 +00003005 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003006 if (isCombineInstrSettingFlag(Opc)) {
3007 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3008 // When NZCV is live bail out.
3009 if (Cmp_NZCV == -1)
Chad Rosier85c85942016-03-23 20:07:28 +00003010 return false;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003011 unsigned NewOpc = convertFlagSettingOpcode(Root);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003012 // When opcode can't change bail out.
3013 // CHECKME: do we miss any cases for opcode conversion?
3014 if (NewOpc == Opc)
Chad Rosier85c85942016-03-23 20:07:28 +00003015 return false;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003016 Opc = NewOpc;
3017 }
3018
3019 switch (Opc) {
3020 default:
3021 break;
3022 case AArch64::ADDWrr:
3023 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3024 "ADDWrr does not have register operands");
3025 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3026 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003027 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003028 Found = true;
3029 }
3030 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3031 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003032 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003033 Found = true;
3034 }
3035 break;
3036 case AArch64::ADDXrr:
3037 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3038 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003039 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003040 Found = true;
3041 }
3042 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3043 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003044 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003045 Found = true;
3046 }
3047 break;
3048 case AArch64::SUBWrr:
3049 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3050 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003051 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003052 Found = true;
3053 }
3054 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3055 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003056 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003057 Found = true;
3058 }
3059 break;
3060 case AArch64::SUBXrr:
3061 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3062 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003063 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003064 Found = true;
3065 }
3066 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3067 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003068 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003069 Found = true;
3070 }
3071 break;
3072 case AArch64::ADDWri:
3073 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3074 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003075 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003076 Found = true;
3077 }
3078 break;
3079 case AArch64::ADDXri:
3080 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3081 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003082 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003083 Found = true;
3084 }
3085 break;
3086 case AArch64::SUBWri:
3087 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3088 AArch64::WZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003089 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003090 Found = true;
3091 }
3092 break;
3093 case AArch64::SUBXri:
3094 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3095 AArch64::XZR)) {
Sanjay Patel387e66e2015-11-05 19:34:57 +00003096 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003097 Found = true;
3098 }
3099 break;
3100 }
3101 return Found;
3102}
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003103/// Floating-Point Support
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003104
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003105/// Find instructions that can be turned into madd.
3106static bool getFMAPatterns(MachineInstr &Root,
3107 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3108
3109 if (!isCombineInstrCandidateFP(Root))
3110 return 0;
3111
3112 MachineBasicBlock &MBB = *Root.getParent();
3113 bool Found = false;
3114
3115 switch (Root.getOpcode()) {
3116 default:
3117 assert(false && "Unsupported FP instruction in combiner\n");
3118 break;
3119 case AArch64::FADDSrr:
3120 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3121 "FADDWrr does not have register operands");
3122 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3123 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3124 Found = true;
3125 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3126 AArch64::FMULv1i32_indexed)) {
3127 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3128 Found = true;
3129 }
3130 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3131 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3132 Found = true;
3133 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3134 AArch64::FMULv1i32_indexed)) {
3135 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3136 Found = true;
3137 }
3138 break;
3139 case AArch64::FADDDrr:
3140 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3141 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3142 Found = true;
3143 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3144 AArch64::FMULv1i64_indexed)) {
3145 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3146 Found = true;
3147 }
3148 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3149 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3150 Found = true;
3151 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3152 AArch64::FMULv1i64_indexed)) {
3153 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3154 Found = true;
3155 }
3156 break;
3157 case AArch64::FADDv2f32:
3158 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3159 AArch64::FMULv2i32_indexed)) {
3160 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3161 Found = true;
3162 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3163 AArch64::FMULv2f32)) {
3164 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3165 Found = true;
3166 }
3167 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3168 AArch64::FMULv2i32_indexed)) {
3169 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3170 Found = true;
3171 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3172 AArch64::FMULv2f32)) {
3173 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3174 Found = true;
3175 }
3176 break;
3177 case AArch64::FADDv2f64:
3178 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3179 AArch64::FMULv2i64_indexed)) {
3180 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3181 Found = true;
3182 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3183 AArch64::FMULv2f64)) {
3184 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3185 Found = true;
3186 }
3187 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3188 AArch64::FMULv2i64_indexed)) {
3189 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3190 Found = true;
3191 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3192 AArch64::FMULv2f64)) {
3193 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3194 Found = true;
3195 }
3196 break;
3197 case AArch64::FADDv4f32:
3198 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3199 AArch64::FMULv4i32_indexed)) {
3200 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3201 Found = true;
3202 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3203 AArch64::FMULv4f32)) {
3204 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3205 Found = true;
3206 }
3207 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3208 AArch64::FMULv4i32_indexed)) {
3209 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3210 Found = true;
3211 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3212 AArch64::FMULv4f32)) {
3213 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3214 Found = true;
3215 }
3216 break;
3217
3218 case AArch64::FSUBSrr:
3219 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3220 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3221 Found = true;
3222 }
3223 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3224 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3225 Found = true;
3226 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3227 AArch64::FMULv1i32_indexed)) {
3228 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3229 Found = true;
3230 }
3231 break;
3232 case AArch64::FSUBDrr:
3233 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3234 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3235 Found = true;
3236 }
3237 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3238 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3239 Found = true;
3240 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3241 AArch64::FMULv1i64_indexed)) {
3242 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3243 Found = true;
3244 }
3245 break;
3246 case AArch64::FSUBv2f32:
3247 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3248 AArch64::FMULv2i32_indexed)) {
3249 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3250 Found = true;
3251 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3252 AArch64::FMULv2f32)) {
3253 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3254 Found = true;
3255 }
3256 break;
3257 case AArch64::FSUBv2f64:
3258 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3259 AArch64::FMULv2i64_indexed)) {
3260 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3261 Found = true;
3262 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3263 AArch64::FMULv2f64)) {
3264 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3265 Found = true;
3266 }
3267 break;
3268 case AArch64::FSUBv4f32:
3269 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3270 AArch64::FMULv4i32_indexed)) {
3271 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3272 Found = true;
3273 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3274 AArch64::FMULv4f32)) {
3275 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3276 Found = true;
3277 }
3278 break;
3279 }
3280 return Found;
3281}
3282
3283/// Return true when a code sequence can improve throughput. It
3284/// should be called only for instructions in loops.
3285/// \param Pattern - combiner pattern
3286bool
3287AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
3288 switch (Pattern) {
3289 default:
3290 break;
3291 case MachineCombinerPattern::FMULADDS_OP1:
3292 case MachineCombinerPattern::FMULADDS_OP2:
3293 case MachineCombinerPattern::FMULSUBS_OP1:
3294 case MachineCombinerPattern::FMULSUBS_OP2:
3295 case MachineCombinerPattern::FMULADDD_OP1:
3296 case MachineCombinerPattern::FMULADDD_OP2:
3297 case MachineCombinerPattern::FMULSUBD_OP1:
3298 case MachineCombinerPattern::FMULSUBD_OP2:
3299 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3300 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3301 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3302 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3303 case MachineCombinerPattern::FMLAv2f32_OP2:
3304 case MachineCombinerPattern::FMLAv2f32_OP1:
3305 case MachineCombinerPattern::FMLAv2f64_OP1:
3306 case MachineCombinerPattern::FMLAv2f64_OP2:
3307 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3308 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3309 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3310 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3311 case MachineCombinerPattern::FMLAv4f32_OP1:
3312 case MachineCombinerPattern::FMLAv4f32_OP2:
3313 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3314 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3315 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3316 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3317 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3318 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3319 case MachineCombinerPattern::FMLSv2f32_OP2:
3320 case MachineCombinerPattern::FMLSv2f64_OP2:
3321 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3322 case MachineCombinerPattern::FMLSv4f32_OP2:
3323 return true;
3324 } // end switch (Pattern)
3325 return false;
3326}
Haicheng Wu08b94622016-01-07 04:01:02 +00003327/// Return true when there is potentially a faster code sequence for an
3328/// instruction chain ending in \p Root. All potential patterns are listed in
3329/// the \p Pattern vector. Pattern should be sorted in priority order since the
3330/// pattern evaluator stops checking as soon as it finds a faster sequence.
3331
3332bool AArch64InstrInfo::getMachineCombinerPatterns(
3333 MachineInstr &Root,
3334 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003335 // Integer patterns
Haicheng Wu08b94622016-01-07 04:01:02 +00003336 if (getMaddPatterns(Root, Patterns))
3337 return true;
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003338 // Floating point patterns
3339 if (getFMAPatterns(Root, Patterns))
3340 return true;
Haicheng Wu08b94622016-01-07 04:01:02 +00003341
3342 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3343}
3344
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003345enum class FMAInstKind { Default, Indexed, Accumulator };
3346/// genFusedMultiply - Generate fused multiply instructions.
3347/// This function supports both integer and floating point instructions.
3348/// A typical example:
3349/// F|MUL I=A,B,0
3350/// F|ADD R,I,C
3351/// ==> F|MADD R,A,B,C
3352/// \param Root is the F|ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003353/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003354/// contain the generated madd instruction
3355/// \param IdxMulOpd is index of operand in Root that is the result of
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003356/// the F|MUL. In the example above IdxMulOpd is 1.
3357/// \param MaddOpc the opcode fo the f|madd instruction
3358static MachineInstr *
3359genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3360 const TargetInstrInfo *TII, MachineInstr &Root,
3361 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3362 unsigned MaddOpc, const TargetRegisterClass *RC,
3363 FMAInstKind kind = FMAInstKind::Default) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003364 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3365
3366 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3367 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003368 unsigned ResultReg = Root.getOperand(0).getReg();
3369 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3370 bool Src0IsKill = MUL->getOperand(1).isKill();
3371 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3372 bool Src1IsKill = MUL->getOperand(2).isKill();
3373 unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3374 bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3375
3376 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3377 MRI.constrainRegClass(ResultReg, RC);
3378 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3379 MRI.constrainRegClass(SrcReg0, RC);
3380 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3381 MRI.constrainRegClass(SrcReg1, RC);
3382 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3383 MRI.constrainRegClass(SrcReg2, RC);
3384
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003385 MachineInstrBuilder MIB;
3386 if (kind == FMAInstKind::Default)
3387 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3388 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3389 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3390 .addReg(SrcReg2, getKillRegState(Src2IsKill));
3391 else if (kind == FMAInstKind::Indexed)
3392 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3393 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3394 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3395 .addReg(SrcReg1, getKillRegState(Src1IsKill))
3396 .addImm(MUL->getOperand(3).getImm());
3397 else if (kind == FMAInstKind::Accumulator)
3398 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3399 .addReg(SrcReg2, getKillRegState(Src2IsKill))
3400 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3401 .addReg(SrcReg1, getKillRegState(Src1IsKill));
3402 else
3403 assert(false && "Invalid FMA instruction kind \n");
3404 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003405 InsInstrs.push_back(MIB);
3406 return MUL;
3407}
3408
3409/// genMaddR - Generate madd instruction and combine mul and add using
3410/// an extra virtual register
3411/// Example - an ADD intermediate needs to be stored in a register:
3412/// MUL I=A,B,0
3413/// ADD R,I,Imm
3414/// ==> ORR V, ZR, Imm
3415/// ==> MADD R,A,B,V
3416/// \param Root is the ADD instruction
NAKAMURA Takumi40da2672014-08-08 02:04:18 +00003417/// \param [out] InsInstrs is a vector of machine instructions and will
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003418/// contain the generated madd instruction
3419/// \param IdxMulOpd is index of operand in Root that is the result of
3420/// the MUL. In the example above IdxMulOpd is 1.
3421/// \param MaddOpc the opcode fo the madd instruction
3422/// \param VR is a virtual register that holds the value of an ADD operand
3423/// (V in the example above).
3424static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
3425 const TargetInstrInfo *TII, MachineInstr &Root,
3426 SmallVectorImpl<MachineInstr *> &InsInstrs,
3427 unsigned IdxMulOpd, unsigned MaddOpc,
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003428 unsigned VR, const TargetRegisterClass *RC) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003429 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3430
3431 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003432 unsigned ResultReg = Root.getOperand(0).getReg();
3433 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3434 bool Src0IsKill = MUL->getOperand(1).isKill();
3435 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3436 bool Src1IsKill = MUL->getOperand(2).isKill();
3437
3438 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3439 MRI.constrainRegClass(ResultReg, RC);
3440 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3441 MRI.constrainRegClass(SrcReg0, RC);
3442 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3443 MRI.constrainRegClass(SrcReg1, RC);
3444 if (TargetRegisterInfo::isVirtualRegister(VR))
3445 MRI.constrainRegClass(VR, RC);
3446
3447 MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
3448 ResultReg)
3449 .addReg(SrcReg0, getKillRegState(Src0IsKill))
3450 .addReg(SrcReg1, getKillRegState(Src1IsKill))
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003451 .addReg(VR);
3452 // Insert the MADD
3453 InsInstrs.push_back(MIB);
3454 return MUL;
3455}
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003456
Sanjay Patelcfe03932015-06-19 23:21:42 +00003457/// When getMachineCombinerPatterns() finds potential patterns,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003458/// this function generates the instructions that could replace the
3459/// original code sequence
3460void AArch64InstrInfo::genAlternativeCodeSequence(
Sanjay Patel387e66e2015-11-05 19:34:57 +00003461 MachineInstr &Root, MachineCombinerPattern Pattern,
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003462 SmallVectorImpl<MachineInstr *> &InsInstrs,
3463 SmallVectorImpl<MachineInstr *> &DelInstrs,
3464 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
3465 MachineBasicBlock &MBB = *Root.getParent();
3466 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3467 MachineFunction &MF = *MBB.getParent();
Eric Christophere0818912014-09-03 20:36:26 +00003468 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003469
3470 MachineInstr *MUL;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003471 const TargetRegisterClass *RC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003472 unsigned Opc;
3473 switch (Pattern) {
3474 default:
Haicheng Wu08b94622016-01-07 04:01:02 +00003475 // Reassociate instructions.
3476 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
3477 DelInstrs, InstrIdxForVirtReg);
3478 return;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003479 case MachineCombinerPattern::MULADDW_OP1:
3480 case MachineCombinerPattern::MULADDX_OP1:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003481 // MUL I=A,B,0
3482 // ADD R,I,C
3483 // ==> MADD R,A,B,C
3484 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003485 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003486 Opc = AArch64::MADDWrrr;
3487 RC = &AArch64::GPR32RegClass;
3488 } else {
3489 Opc = AArch64::MADDXrrr;
3490 RC = &AArch64::GPR64RegClass;
3491 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003492 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003493 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003494 case MachineCombinerPattern::MULADDW_OP2:
3495 case MachineCombinerPattern::MULADDX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003496 // MUL I=A,B,0
3497 // ADD R,C,I
3498 // ==> MADD R,A,B,C
3499 // --- Create(MADD);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003500 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003501 Opc = AArch64::MADDWrrr;
3502 RC = &AArch64::GPR32RegClass;
3503 } else {
3504 Opc = AArch64::MADDXrrr;
3505 RC = &AArch64::GPR64RegClass;
3506 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003507 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003508 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003509 case MachineCombinerPattern::MULADDWI_OP1:
3510 case MachineCombinerPattern::MULADDXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003511 // MUL I=A,B,0
3512 // ADD R,I,Imm
3513 // ==> ORR V, ZR, Imm
3514 // ==> MADD R,A,B,V
3515 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003516 const TargetRegisterClass *OrrRC;
3517 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003518 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003519 OrrOpc = AArch64::ORRWri;
3520 OrrRC = &AArch64::GPR32spRegClass;
3521 BitSize = 32;
3522 ZeroReg = AArch64::WZR;
3523 Opc = AArch64::MADDWrrr;
3524 RC = &AArch64::GPR32RegClass;
3525 } else {
3526 OrrOpc = AArch64::ORRXri;
3527 OrrRC = &AArch64::GPR64spRegClass;
3528 BitSize = 64;
3529 ZeroReg = AArch64::XZR;
3530 Opc = AArch64::MADDXrrr;
3531 RC = &AArch64::GPR64RegClass;
3532 }
3533 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
3534 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003535
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003536 if (Root.getOperand(3).isImm()) {
3537 unsigned Val = Root.getOperand(3).getImm();
3538 Imm = Imm << Val;
3539 }
David Majnemer1182dd82016-07-21 23:46:56 +00003540 uint64_t UImm = SignExtend64(Imm, BitSize);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003541 uint64_t Encoding;
3542 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3543 MachineInstrBuilder MIB1 =
3544 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
3545 .addReg(ZeroReg)
3546 .addImm(Encoding);
3547 InsInstrs.push_back(MIB1);
3548 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
3549 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003550 }
3551 break;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003552 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003553 case MachineCombinerPattern::MULSUBW_OP1:
3554 case MachineCombinerPattern::MULSUBX_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003555 // MUL I=A,B,0
3556 // SUB R,I, C
3557 // ==> SUB V, 0, C
3558 // ==> MADD R,A,B,V // = -C + A*B
3559 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003560 const TargetRegisterClass *SubRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003561 unsigned SubOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003562 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003563 SubOpc = AArch64::SUBWrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003564 SubRC = &AArch64::GPR32spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003565 ZeroReg = AArch64::WZR;
3566 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003567 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003568 } else {
3569 SubOpc = AArch64::SUBXrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003570 SubRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003571 ZeroReg = AArch64::XZR;
3572 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003573 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003574 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003575 unsigned NewVR = MRI.createVirtualRegister(SubRC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003576 // SUB NewVR, 0, C
3577 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003578 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003579 .addReg(ZeroReg)
3580 .addOperand(Root.getOperand(2));
3581 InsInstrs.push_back(MIB1);
3582 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003583 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
3584 break;
3585 }
Sanjay Patel387e66e2015-11-05 19:34:57 +00003586 case MachineCombinerPattern::MULSUBW_OP2:
3587 case MachineCombinerPattern::MULSUBX_OP2:
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003588 // MUL I=A,B,0
3589 // SUB R,C,I
3590 // ==> MSUB R,A,B,C (computes C - A*B)
3591 // --- Create(MSUB);
Sanjay Patel387e66e2015-11-05 19:34:57 +00003592 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003593 Opc = AArch64::MSUBWrrr;
3594 RC = &AArch64::GPR32RegClass;
3595 } else {
3596 Opc = AArch64::MSUBXrrr;
3597 RC = &AArch64::GPR64RegClass;
3598 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003599 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003600 break;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003601 case MachineCombinerPattern::MULSUBWI_OP1:
3602 case MachineCombinerPattern::MULSUBXI_OP1: {
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003603 // MUL I=A,B,0
3604 // SUB R,I, Imm
3605 // ==> ORR V, ZR, -Imm
3606 // ==> MADD R,A,B,V // = -Imm + A*B
3607 // --- Create(MADD);
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003608 const TargetRegisterClass *OrrRC;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003609 unsigned BitSize, OrrOpc, ZeroReg;
Sanjay Patel387e66e2015-11-05 19:34:57 +00003610 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
Juergen Ributzka25816b02014-08-30 06:16:26 +00003611 OrrOpc = AArch64::ORRWri;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003612 OrrRC = &AArch64::GPR32spRegClass;
3613 BitSize = 32;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003614 ZeroReg = AArch64::WZR;
3615 Opc = AArch64::MADDWrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003616 RC = &AArch64::GPR32RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003617 } else {
3618 OrrOpc = AArch64::ORRXri;
Juergen Ributzkaf9660f02014-11-04 22:20:07 +00003619 OrrRC = &AArch64::GPR64spRegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003620 BitSize = 64;
3621 ZeroReg = AArch64::XZR;
3622 Opc = AArch64::MADDXrrr;
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003623 RC = &AArch64::GPR64RegClass;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003624 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003625 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
David Majnemer1182dd82016-07-21 23:46:56 +00003626 uint64_t Imm = Root.getOperand(2).getImm();
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003627 if (Root.getOperand(3).isImm()) {
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003628 unsigned Val = Root.getOperand(3).getImm();
3629 Imm = Imm << Val;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003630 }
David Majnemer1182dd82016-07-21 23:46:56 +00003631 uint64_t UImm = SignExtend64(-Imm, BitSize);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003632 uint64_t Encoding;
3633 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
3634 MachineInstrBuilder MIB1 =
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003635 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003636 .addReg(ZeroReg)
3637 .addImm(Encoding);
3638 InsInstrs.push_back(MIB1);
3639 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003640 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003641 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003642 break;
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003643 }
Gerolf Hoflehner01b3a6182016-04-24 05:14:01 +00003644 // Floating Point Support
3645 case MachineCombinerPattern::FMULADDS_OP1:
3646 case MachineCombinerPattern::FMULADDD_OP1:
3647 // MUL I=A,B,0
3648 // ADD R,I,C
3649 // ==> MADD R,A,B,C
3650 // --- Create(MADD);
3651 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
3652 Opc = AArch64::FMADDSrrr;
3653 RC = &AArch64::FPR32RegClass;
3654 } else {
3655 Opc = AArch64::FMADDDrrr;
3656 RC = &AArch64::FPR64RegClass;
3657 }
3658 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3659 break;
3660 case MachineCombinerPattern::FMULADDS_OP2:
3661 case MachineCombinerPattern::FMULADDD_OP2:
3662 // FMUL I=A,B,0
3663 // FADD R,C,I
3664 // ==> FMADD R,A,B,C
3665 // --- Create(FMADD);
3666 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
3667 Opc = AArch64::FMADDSrrr;
3668 RC = &AArch64::FPR32RegClass;
3669 } else {
3670 Opc = AArch64::FMADDDrrr;
3671 RC = &AArch64::FPR64RegClass;
3672 }
3673 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3674 break;
3675
3676 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3677 Opc = AArch64::FMLAv1i32_indexed;
3678 RC = &AArch64::FPR32RegClass;
3679 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3680 FMAInstKind::Indexed);
3681 break;
3682 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3683 Opc = AArch64::FMLAv1i32_indexed;
3684 RC = &AArch64::FPR32RegClass;
3685 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3686 FMAInstKind::Indexed);
3687 break;
3688
3689 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3690 Opc = AArch64::FMLAv1i64_indexed;
3691 RC = &AArch64::FPR64RegClass;
3692 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3693 FMAInstKind::Indexed);
3694 break;
3695 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3696 Opc = AArch64::FMLAv1i64_indexed;
3697 RC = &AArch64::FPR64RegClass;
3698 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3699 FMAInstKind::Indexed);
3700 break;
3701
3702 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3703 case MachineCombinerPattern::FMLAv2f32_OP1:
3704 RC = &AArch64::FPR64RegClass;
3705 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
3706 Opc = AArch64::FMLAv2i32_indexed;
3707 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3708 FMAInstKind::Indexed);
3709 } else {
3710 Opc = AArch64::FMLAv2f32;
3711 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3712 FMAInstKind::Accumulator);
3713 }
3714 break;
3715 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3716 case MachineCombinerPattern::FMLAv2f32_OP2:
3717 RC = &AArch64::FPR64RegClass;
3718 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
3719 Opc = AArch64::FMLAv2i32_indexed;
3720 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3721 FMAInstKind::Indexed);
3722 } else {
3723 Opc = AArch64::FMLAv2f32;
3724 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3725 FMAInstKind::Accumulator);
3726 }
3727 break;
3728
3729 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3730 case MachineCombinerPattern::FMLAv2f64_OP1:
3731 RC = &AArch64::FPR128RegClass;
3732 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
3733 Opc = AArch64::FMLAv2i64_indexed;
3734 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3735 FMAInstKind::Indexed);
3736 } else {
3737 Opc = AArch64::FMLAv2f64;
3738 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3739 FMAInstKind::Accumulator);
3740 }
3741 break;
3742 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3743 case MachineCombinerPattern::FMLAv2f64_OP2:
3744 RC = &AArch64::FPR128RegClass;
3745 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
3746 Opc = AArch64::FMLAv2i64_indexed;
3747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3748 FMAInstKind::Indexed);
3749 } else {
3750 Opc = AArch64::FMLAv2f64;
3751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3752 FMAInstKind::Accumulator);
3753 }
3754 break;
3755
3756 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3757 case MachineCombinerPattern::FMLAv4f32_OP1:
3758 RC = &AArch64::FPR128RegClass;
3759 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
3760 Opc = AArch64::FMLAv4i32_indexed;
3761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3762 FMAInstKind::Indexed);
3763 } else {
3764 Opc = AArch64::FMLAv4f32;
3765 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
3766 FMAInstKind::Accumulator);
3767 }
3768 break;
3769
3770 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3771 case MachineCombinerPattern::FMLAv4f32_OP2:
3772 RC = &AArch64::FPR128RegClass;
3773 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
3774 Opc = AArch64::FMLAv4i32_indexed;
3775 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3776 FMAInstKind::Indexed);
3777 } else {
3778 Opc = AArch64::FMLAv4f32;
3779 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3780 FMAInstKind::Accumulator);
3781 }
3782 break;
3783
3784 case MachineCombinerPattern::FMULSUBS_OP1:
3785 case MachineCombinerPattern::FMULSUBD_OP1: {
3786 // FMUL I=A,B,0
3787 // FSUB R,I,C
3788 // ==> FNMSUB R,A,B,C // = -C + A*B
3789 // --- Create(FNMSUB);
3790 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
3791 Opc = AArch64::FNMSUBSrrr;
3792 RC = &AArch64::FPR32RegClass;
3793 } else {
3794 Opc = AArch64::FNMSUBDrrr;
3795 RC = &AArch64::FPR64RegClass;
3796 }
3797 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
3798 break;
3799 }
3800 case MachineCombinerPattern::FMULSUBS_OP2:
3801 case MachineCombinerPattern::FMULSUBD_OP2: {
3802 // FMUL I=A,B,0
3803 // FSUB R,C,I
3804 // ==> FMSUB R,A,B,C (computes C - A*B)
3805 // --- Create(FMSUB);
3806 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
3807 Opc = AArch64::FMSUBSrrr;
3808 RC = &AArch64::FPR32RegClass;
3809 } else {
3810 Opc = AArch64::FMSUBDrrr;
3811 RC = &AArch64::FPR64RegClass;
3812 }
3813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
3814 break;
3815
3816 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3817 Opc = AArch64::FMLSv1i32_indexed;
3818 RC = &AArch64::FPR32RegClass;
3819 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3820 FMAInstKind::Indexed);
3821 break;
3822
3823 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3824 Opc = AArch64::FMLSv1i64_indexed;
3825 RC = &AArch64::FPR64RegClass;
3826 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3827 FMAInstKind::Indexed);
3828 break;
3829
3830 case MachineCombinerPattern::FMLSv2f32_OP2:
3831 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3832 RC = &AArch64::FPR64RegClass;
3833 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
3834 Opc = AArch64::FMLSv2i32_indexed;
3835 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3836 FMAInstKind::Indexed);
3837 } else {
3838 Opc = AArch64::FMLSv2f32;
3839 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3840 FMAInstKind::Accumulator);
3841 }
3842 break;
3843
3844 case MachineCombinerPattern::FMLSv2f64_OP2:
3845 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3846 RC = &AArch64::FPR128RegClass;
3847 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
3848 Opc = AArch64::FMLSv2i64_indexed;
3849 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3850 FMAInstKind::Indexed);
3851 } else {
3852 Opc = AArch64::FMLSv2f64;
3853 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3854 FMAInstKind::Accumulator);
3855 }
3856 break;
3857
3858 case MachineCombinerPattern::FMLSv4f32_OP2:
3859 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3860 RC = &AArch64::FPR128RegClass;
3861 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
3862 Opc = AArch64::FMLSv4i32_indexed;
3863 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3864 FMAInstKind::Indexed);
3865 } else {
3866 Opc = AArch64::FMLSv4f32;
3867 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
3868 FMAInstKind::Accumulator);
3869 }
3870 break;
3871 }
Juergen Ributzka31e5b7f2014-09-03 07:07:10 +00003872 } // end switch (Pattern)
Gerolf Hoflehner97c383b2014-08-07 21:40:58 +00003873 // Record MUL and ADD/SUB for deletion
3874 DelInstrs.push_back(MUL);
3875 DelInstrs.push_back(&Root);
3876
3877 return;
3878}
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003879
3880/// \brief Replace csincr-branch sequence by simple conditional branch
3881///
3882/// Examples:
3883/// 1.
3884/// csinc w9, wzr, wzr, <condition code>
3885/// tbnz w9, #0, 0x44
3886/// to
3887/// b.<inverted condition code>
3888///
3889/// 2.
3890/// csinc w9, wzr, wzr, <condition code>
3891/// tbz w9, #0, 0x44
3892/// to
3893/// b.<condition code>
3894///
Chad Rosier4aeab5f2016-03-21 13:43:58 +00003895/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
3896/// compare's constant operand is power of 2.
Balaram Makame9b27252016-03-10 17:54:55 +00003897///
3898/// Examples:
3899/// and w8, w8, #0x400
3900/// cbnz w8, L1
3901/// to
3902/// tbnz w8, #10, L1
3903///
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003904/// \param MI Conditional Branch
3905/// \return True when the simple conditional branch is generated
3906///
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003907bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003908 bool IsNegativeBranch = false;
3909 bool IsTestAndBranch = false;
3910 unsigned TargetBBInMI = 0;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003911 switch (MI.getOpcode()) {
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003912 default:
3913 llvm_unreachable("Unknown branch instruction?");
3914 case AArch64::Bcc:
3915 return false;
3916 case AArch64::CBZW:
3917 case AArch64::CBZX:
3918 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003919 break;
3920 case AArch64::CBNZW:
3921 case AArch64::CBNZX:
3922 TargetBBInMI = 1;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003923 IsNegativeBranch = true;
3924 break;
3925 case AArch64::TBZW:
3926 case AArch64::TBZX:
3927 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003928 IsTestAndBranch = true;
3929 break;
3930 case AArch64::TBNZW:
3931 case AArch64::TBNZX:
3932 TargetBBInMI = 2;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003933 IsNegativeBranch = true;
3934 IsTestAndBranch = true;
3935 break;
3936 }
3937 // So we increment a zero register and test for bits other
3938 // than bit 0? Conservatively bail out in case the verifier
3939 // missed this case.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003940 if (IsTestAndBranch && MI.getOperand(1).getImm())
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003941 return false;
3942
3943 // Find Definition.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003944 assert(MI.getParent() && "Incomplete machine instruciton\n");
3945 MachineBasicBlock *MBB = MI.getParent();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003946 MachineFunction *MF = MBB->getParent();
3947 MachineRegisterInfo *MRI = &MF->getRegInfo();
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003948 unsigned VReg = MI.getOperand(0).getReg();
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00003949 if (!TargetRegisterInfo::isVirtualRegister(VReg))
3950 return false;
3951
3952 MachineInstr *DefMI = MRI->getVRegDef(VReg);
3953
Balaram Makame9b27252016-03-10 17:54:55 +00003954 // Look through COPY instructions to find definition.
3955 while (DefMI->isCopy()) {
3956 unsigned CopyVReg = DefMI->getOperand(1).getReg();
3957 if (!MRI->hasOneNonDBGUse(CopyVReg))
3958 return false;
3959 if (!MRI->hasOneDef(CopyVReg))
3960 return false;
3961 DefMI = MRI->getVRegDef(CopyVReg);
3962 }
3963
3964 switch (DefMI->getOpcode()) {
3965 default:
3966 return false;
3967 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
3968 case AArch64::ANDWri:
3969 case AArch64::ANDXri: {
3970 if (IsTestAndBranch)
3971 return false;
3972 if (DefMI->getParent() != MBB)
3973 return false;
3974 if (!MRI->hasOneNonDBGUse(VReg))
3975 return false;
3976
Quentin Colombetabe2d012016-04-25 20:54:08 +00003977 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
Balaram Makame9b27252016-03-10 17:54:55 +00003978 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
Quentin Colombetabe2d012016-04-25 20:54:08 +00003979 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
Balaram Makame9b27252016-03-10 17:54:55 +00003980 if (!isPowerOf2_64(Mask))
3981 return false;
3982
3983 MachineOperand &MO = DefMI->getOperand(1);
3984 unsigned NewReg = MO.getReg();
3985 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
3986 return false;
3987
3988 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
3989
3990 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00003991 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
3992 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00003993 unsigned Imm = Log2_64(Mask);
Renato Golin179d1f52016-04-23 19:30:52 +00003994 unsigned Opc = (Imm < 32)
3995 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
3996 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
Quentin Colombetabe2d012016-04-25 20:54:08 +00003997 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
3998 .addReg(NewReg)
3999 .addImm(Imm)
4000 .addMBB(TBB);
Matthias Braune25bbd02016-05-03 04:54:16 +00004001 // Register lives on to the CBZ now.
4002 MO.setIsKill(false);
Quentin Colombetabe2d012016-04-25 20:54:08 +00004003
4004 // For immediate smaller than 32, we need to use the 32-bit
4005 // variant (W) in all cases. Indeed the 64-bit variant does not
4006 // allow to encode them.
4007 // Therefore, if the input register is 64-bit, we need to take the
4008 // 32-bit sub-part.
4009 if (!Is32Bit && Imm < 32)
4010 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004011 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004012 return true;
4013 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004014 // Look for CSINC
Balaram Makame9b27252016-03-10 17:54:55 +00004015 case AArch64::CSINCWr:
4016 case AArch64::CSINCXr: {
4017 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4018 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4019 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4020 DefMI->getOperand(2).getReg() == AArch64::XZR))
4021 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004022
Balaram Makame9b27252016-03-10 17:54:55 +00004023 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4024 return false;
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004025
Balaram Makame9b27252016-03-10 17:54:55 +00004026 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
Balaram Makame9b27252016-03-10 17:54:55 +00004027 // Convert only when the condition code is not modified between
4028 // the CSINC and the branch. The CC may be used by other
4029 // instructions in between.
Evgeny Astigeevich9c24ebf2016-04-06 11:39:00 +00004030 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
Balaram Makame9b27252016-03-10 17:54:55 +00004031 return false;
4032 MachineBasicBlock &RefToMBB = *MBB;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004033 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4034 DebugLoc DL = MI.getDebugLoc();
Balaram Makame9b27252016-03-10 17:54:55 +00004035 if (IsNegativeBranch)
4036 CC = AArch64CC::getInvertedCondCode(CC);
4037 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00004038 MI.eraseFromParent();
Balaram Makame9b27252016-03-10 17:54:55 +00004039 return true;
4040 }
4041 }
Gerolf Hoflehnera4c96d02014-10-14 23:07:53 +00004042}
Alex Lorenzf3630112015-08-18 22:52:15 +00004043
4044std::pair<unsigned, unsigned>
4045AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4046 const unsigned Mask = AArch64II::MO_FRAGMENT;
4047 return std::make_pair(TF & Mask, TF & ~Mask);
4048}
4049
4050ArrayRef<std::pair<unsigned, const char *>>
4051AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4052 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00004053 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004054 {MO_PAGE, "aarch64-page"},
4055 {MO_PAGEOFF, "aarch64-pageoff"},
4056 {MO_G3, "aarch64-g3"},
4057 {MO_G2, "aarch64-g2"},
4058 {MO_G1, "aarch64-g1"},
4059 {MO_G0, "aarch64-g0"},
4060 {MO_HI12, "aarch64-hi12"}};
4061 return makeArrayRef(TargetFlags);
4062}
4063
4064ArrayRef<std::pair<unsigned, const char *>>
4065AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4066 using namespace AArch64II;
Hal Finkel982e8d42015-08-30 08:07:29 +00004067 static const std::pair<unsigned, const char *> TargetFlags[] = {
Alex Lorenzf3630112015-08-18 22:52:15 +00004068 {MO_GOT, "aarch64-got"},
4069 {MO_NC, "aarch64-nc"},
Rafael Espindola4d290992016-05-31 18:31:14 +00004070 {MO_TLS, "aarch64-tls"}};
Alex Lorenzf3630112015-08-18 22:52:15 +00004071 return makeArrayRef(TargetFlags);
4072}