blob: a03eee3adb94ff9558b64cdc6401e3795a125b61 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000025#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000026#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000027#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000028#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000031#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000032#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/IR/Type.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/raw_ostream.h"
36
37#define DEBUG_TYPE "aarch64-isel"
38
39using namespace llvm;
40
Daniel Sanders0b5293f2017-04-06 09:49:34 +000041namespace {
42
Daniel Sanderse7b0d662017-04-21 15:59:56 +000043#define GET_GLOBALISEL_PREDICATE_BITSET
44#include "AArch64GenGlobalISel.inc"
45#undef GET_GLOBALISEL_PREDICATE_BITSET
46
Daniel Sanders0b5293f2017-04-06 09:49:34 +000047class AArch64InstructionSelector : public InstructionSelector {
48public:
49 AArch64InstructionSelector(const AArch64TargetMachine &TM,
50 const AArch64Subtarget &STI,
51 const AArch64RegisterBankInfo &RBI);
52
Daniel Sandersf76f3152017-11-16 00:46:35 +000053 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000054 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000055
56private:
57 /// tblgen-erated 'select' implementation, used as the initial selector for
58 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000059 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000060
61 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
62 MachineRegisterInfo &MRI) const;
63 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
64 MachineRegisterInfo &MRI) const;
65
66 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
67 MachineRegisterInfo &MRI) const;
68
Amara Emerson5ec14602018-12-10 18:44:58 +000069 // Helper to generate an equivalent of scalar_to_vector into a new register,
70 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000071 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000072 const TargetRegisterClass *DstRC,
73 unsigned Scalar,
74 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000075
76 /// Emit a lane insert into \p DstReg, or a new vector register if None is
77 /// provided.
78 ///
79 /// The lane inserted into is defined by \p LaneIdx. The vector source
80 /// register is given by \p SrcReg. The register containing the element is
81 /// given by \p EltReg.
82 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
83 unsigned EltReg, unsigned LaneIdx,
84 const RegisterBank &RB,
85 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000086 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000087 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000088 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000089 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000090
Amara Emerson1abe05c2019-02-21 20:20:16 +000091 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
92 SmallVectorImpl<int> &Idxs) const;
93 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000094 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000095 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +000096 bool selectSplitVectorUnmerge(MachineInstr &I,
97 MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000098
99 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
100 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
101 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000102
103 // Emit a vector concat operation.
104 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
105 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000106 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000107 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
108 const RegisterBank &DstRB, LLT ScalarTy,
109 unsigned VecReg, unsigned LaneIdx,
110 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000111
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000112 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000113
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000114 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
115 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000116
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000117 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000118 return selectAddrModeUnscaled(Root, 1);
119 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000120 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000121 return selectAddrModeUnscaled(Root, 2);
122 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000123 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000124 return selectAddrModeUnscaled(Root, 4);
125 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000126 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000127 return selectAddrModeUnscaled(Root, 8);
128 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000129 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000130 return selectAddrModeUnscaled(Root, 16);
131 }
132
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000133 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
134 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000135 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000136 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000137 return selectAddrModeIndexed(Root, Width / 8);
138 }
139
Volkan Kelesf7f25682018-01-16 18:44:05 +0000140 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
141
Amara Emerson1e8c1642018-07-31 00:09:02 +0000142 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
143 void materializeLargeCMVal(MachineInstr &I, const Value *V,
144 unsigned char OpFlags) const;
145
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000146 const AArch64TargetMachine &TM;
147 const AArch64Subtarget &STI;
148 const AArch64InstrInfo &TII;
149 const AArch64RegisterInfo &TRI;
150 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000151
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000152#define GET_GLOBALISEL_PREDICATES_DECL
153#include "AArch64GenGlobalISel.inc"
154#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000155
156// We declare the temporaries used by selectImpl() in the class to minimize the
157// cost of constructing placeholder values.
158#define GET_GLOBALISEL_TEMPORARIES_DECL
159#include "AArch64GenGlobalISel.inc"
160#undef GET_GLOBALISEL_TEMPORARIES_DECL
161};
162
163} // end anonymous namespace
164
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000165#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000166#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000167#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000168
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000169AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000170 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
171 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000172 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000173 TRI(*STI.getRegisterInfo()), RBI(RBI),
174#define GET_GLOBALISEL_PREDICATES_INIT
175#include "AArch64GenGlobalISel.inc"
176#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000177#define GET_GLOBALISEL_TEMPORARIES_INIT
178#include "AArch64GenGlobalISel.inc"
179#undef GET_GLOBALISEL_TEMPORARIES_INIT
180{
181}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000182
Tim Northoverfb8d9892016-10-12 22:49:15 +0000183// FIXME: This should be target-independent, inferred from the types declared
184// for each class in the bank.
185static const TargetRegisterClass *
186getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000187 const RegisterBankInfo &RBI,
188 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000189 if (RB.getID() == AArch64::GPRRegBankID) {
190 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000191 return GetAllRegSet ? &AArch64::GPR32allRegClass
192 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000193 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000194 return GetAllRegSet ? &AArch64::GPR64allRegClass
195 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000196 return nullptr;
197 }
198
199 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000200 if (Ty.getSizeInBits() <= 16)
201 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000202 if (Ty.getSizeInBits() == 32)
203 return &AArch64::FPR32RegClass;
204 if (Ty.getSizeInBits() == 64)
205 return &AArch64::FPR64RegClass;
206 if (Ty.getSizeInBits() == 128)
207 return &AArch64::FPR128RegClass;
208 return nullptr;
209 }
210
211 return nullptr;
212}
213
Jessica Paquette245047d2019-01-24 22:00:41 +0000214/// Given a register bank, and size in bits, return the smallest register class
215/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000216static const TargetRegisterClass *
217getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
218 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000219 unsigned RegBankID = RB.getID();
220
221 if (RegBankID == AArch64::GPRRegBankID) {
222 if (SizeInBits <= 32)
223 return GetAllRegSet ? &AArch64::GPR32allRegClass
224 : &AArch64::GPR32RegClass;
225 if (SizeInBits == 64)
226 return GetAllRegSet ? &AArch64::GPR64allRegClass
227 : &AArch64::GPR64RegClass;
228 }
229
230 if (RegBankID == AArch64::FPRRegBankID) {
231 switch (SizeInBits) {
232 default:
233 return nullptr;
234 case 8:
235 return &AArch64::FPR8RegClass;
236 case 16:
237 return &AArch64::FPR16RegClass;
238 case 32:
239 return &AArch64::FPR32RegClass;
240 case 64:
241 return &AArch64::FPR64RegClass;
242 case 128:
243 return &AArch64::FPR128RegClass;
244 }
245 }
246
247 return nullptr;
248}
249
250/// Returns the correct subregister to use for a given register class.
251static bool getSubRegForClass(const TargetRegisterClass *RC,
252 const TargetRegisterInfo &TRI, unsigned &SubReg) {
253 switch (TRI.getRegSizeInBits(*RC)) {
254 case 8:
255 SubReg = AArch64::bsub;
256 break;
257 case 16:
258 SubReg = AArch64::hsub;
259 break;
260 case 32:
261 if (RC == &AArch64::GPR32RegClass)
262 SubReg = AArch64::sub_32;
263 else
264 SubReg = AArch64::ssub;
265 break;
266 case 64:
267 SubReg = AArch64::dsub;
268 break;
269 default:
270 LLVM_DEBUG(
271 dbgs() << "Couldn't find appropriate subregister for register class.");
272 return false;
273 }
274
275 return true;
276}
277
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000278/// Check whether \p I is a currently unsupported binary operation:
279/// - it has an unsized type
280/// - an operand is not a vreg
281/// - all operands are not in the same bank
282/// These are checks that should someday live in the verifier, but right now,
283/// these are mostly limitations of the aarch64 selector.
284static bool unsupportedBinOp(const MachineInstr &I,
285 const AArch64RegisterBankInfo &RBI,
286 const MachineRegisterInfo &MRI,
287 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000288 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000289 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000290 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000291 return true;
292 }
293
294 const RegisterBank *PrevOpBank = nullptr;
295 for (auto &MO : I.operands()) {
296 // FIXME: Support non-register operands.
297 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000298 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000299 return true;
300 }
301
302 // FIXME: Can generic operations have physical registers operands? If
303 // so, this will need to be taught about that, and we'll need to get the
304 // bank out of the minimal class for the register.
305 // Either way, this needs to be documented (and possibly verified).
306 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000307 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000308 return true;
309 }
310
311 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
312 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000313 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000314 return true;
315 }
316
317 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000318 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000319 return true;
320 }
321 PrevOpBank = OpBank;
322 }
323 return false;
324}
325
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000326/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000327/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000328/// and of size \p OpSize.
329/// \returns \p GenericOpc if the combination is unsupported.
330static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
331 unsigned OpSize) {
332 switch (RegBankID) {
333 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000334 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000335 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000336 case TargetOpcode::G_SHL:
337 return AArch64::LSLVWr;
338 case TargetOpcode::G_LSHR:
339 return AArch64::LSRVWr;
340 case TargetOpcode::G_ASHR:
341 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000342 default:
343 return GenericOpc;
344 }
Tim Northover55782222016-10-18 20:03:48 +0000345 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000346 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000347 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000348 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000349 case TargetOpcode::G_SHL:
350 return AArch64::LSLVXr;
351 case TargetOpcode::G_LSHR:
352 return AArch64::LSRVXr;
353 case TargetOpcode::G_ASHR:
354 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000355 default:
356 return GenericOpc;
357 }
358 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000359 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000360 case AArch64::FPRRegBankID:
361 switch (OpSize) {
362 case 32:
363 switch (GenericOpc) {
364 case TargetOpcode::G_FADD:
365 return AArch64::FADDSrr;
366 case TargetOpcode::G_FSUB:
367 return AArch64::FSUBSrr;
368 case TargetOpcode::G_FMUL:
369 return AArch64::FMULSrr;
370 case TargetOpcode::G_FDIV:
371 return AArch64::FDIVSrr;
372 default:
373 return GenericOpc;
374 }
375 case 64:
376 switch (GenericOpc) {
377 case TargetOpcode::G_FADD:
378 return AArch64::FADDDrr;
379 case TargetOpcode::G_FSUB:
380 return AArch64::FSUBDrr;
381 case TargetOpcode::G_FMUL:
382 return AArch64::FMULDrr;
383 case TargetOpcode::G_FDIV:
384 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000385 case TargetOpcode::G_OR:
386 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000387 default:
388 return GenericOpc;
389 }
390 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000391 break;
392 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000393 return GenericOpc;
394}
395
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000396/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
397/// appropriate for the (value) register bank \p RegBankID and of memory access
398/// size \p OpSize. This returns the variant with the base+unsigned-immediate
399/// addressing mode (e.g., LDRXui).
400/// \returns \p GenericOpc if the combination is unsupported.
401static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
402 unsigned OpSize) {
403 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
404 switch (RegBankID) {
405 case AArch64::GPRRegBankID:
406 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000407 case 8:
408 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
409 case 16:
410 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000411 case 32:
412 return isStore ? AArch64::STRWui : AArch64::LDRWui;
413 case 64:
414 return isStore ? AArch64::STRXui : AArch64::LDRXui;
415 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000416 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000417 case AArch64::FPRRegBankID:
418 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000419 case 8:
420 return isStore ? AArch64::STRBui : AArch64::LDRBui;
421 case 16:
422 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000423 case 32:
424 return isStore ? AArch64::STRSui : AArch64::LDRSui;
425 case 64:
426 return isStore ? AArch64::STRDui : AArch64::LDRDui;
427 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000428 break;
429 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000430 return GenericOpc;
431}
432
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000433#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000434/// Helper function that verifies that we have a valid copy at the end of
435/// selectCopy. Verifies that the source and dest have the expected sizes and
436/// then returns true.
437static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
438 const MachineRegisterInfo &MRI,
439 const TargetRegisterInfo &TRI,
440 const RegisterBankInfo &RBI) {
441 const unsigned DstReg = I.getOperand(0).getReg();
442 const unsigned SrcReg = I.getOperand(1).getReg();
443 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
444 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000445
Jessica Paquette245047d2019-01-24 22:00:41 +0000446 // Make sure the size of the source and dest line up.
447 assert(
448 (DstSize == SrcSize ||
449 // Copies are a mean to setup initial types, the number of
450 // bits may not exactly match.
451 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
452 // Copies are a mean to copy bits around, as long as we are
453 // on the same register class, that's fine. Otherwise, that
454 // means we need some SUBREG_TO_REG or AND & co.
455 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
456 "Copy with different width?!");
457
458 // Check the size of the destination.
459 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
460 "GPRs cannot get more than 64-bit width values");
461
462 return true;
463}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000464#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000465
466/// Helper function for selectCopy. Inserts a subregister copy from
467/// \p *From to \p *To, linking it up to \p I.
468///
469/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
470///
471/// CopyReg (From class) = COPY SrcReg
472/// SubRegCopy (To class) = COPY CopyReg:SubReg
473/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000474static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000475 const RegisterBankInfo &RBI, unsigned SrcReg,
476 const TargetRegisterClass *From,
477 const TargetRegisterClass *To,
478 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000479 MachineIRBuilder MIB(I);
480 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000481 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
482 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000483 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000484 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000485
486 // It's possible that the destination register won't be constrained. Make
487 // sure that happens.
488 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
489 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
490
Amara Emersondb211892018-02-20 05:11:57 +0000491 return true;
492}
493
Quentin Colombetcb629a82016-10-12 03:57:49 +0000494static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
495 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
496 const RegisterBankInfo &RBI) {
497
498 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000499 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000500 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
501 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
502 const TargetRegisterClass *DstRC = getMinClassForRegBank(
503 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
504 if (!DstRC) {
505 LLVM_DEBUG(dbgs() << "Unexpected dest size "
506 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000507 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000508 }
509
Jessica Paquette245047d2019-01-24 22:00:41 +0000510 // A couple helpers below, for making sure that the copy we produce is valid.
511
512 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
513 // to verify that the src and dst are the same size, since that's handled by
514 // the SUBREG_TO_REG.
515 bool KnownValid = false;
516
517 // Returns true, or asserts if something we don't expect happens. Instead of
518 // returning true, we return isValidCopy() to ensure that we verify the
519 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000520 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000521 // If we have a bitcast or something, we can't have physical registers.
522 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000523 (I.isCopy() ||
524 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
525 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
526 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000527 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000528 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000529 return true;
530 };
531
532 // Is this a copy? If so, then we may need to insert a subregister copy, or
533 // a SUBREG_TO_REG.
534 if (I.isCopy()) {
535 // Yes. Check if there's anything to fix up.
536 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
537 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
Amara Emerson7e9f3482018-02-18 17:10:49 +0000538 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000539 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
540 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000541 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000542
543 // Is this a cross-bank copy?
544 if (DstRegBank.getID() != SrcRegBank.getID()) {
545 // If we're doing a cross-bank copy on different-sized registers, we need
546 // to do a bit more work.
547 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
548 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
549
550 if (SrcSize > DstSize) {
551 // We're doing a cross-bank copy into a smaller register. We need a
552 // subregister copy. First, get a register class that's on the same bank
553 // as the destination, but the same size as the source.
554 const TargetRegisterClass *SubregRC =
555 getMinClassForRegBank(DstRegBank, SrcSize, true);
556 assert(SubregRC && "Didn't get a register class for subreg?");
557
558 // Get the appropriate subregister for the destination.
559 unsigned SubReg = 0;
560 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
561 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
562 return false;
563 }
564
565 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000566 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000567 return CheckCopy();
568 }
569
570 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
571 SrcSize == 16) {
572 // Special case for FPR16 to GPR32.
573 // FIXME: This can probably be generalized like the above case.
574 unsigned PromoteReg =
575 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
576 BuildMI(*I.getParent(), I, I.getDebugLoc(),
577 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
578 .addImm(0)
579 .addUse(SrcReg)
580 .addImm(AArch64::hsub);
581 MachineOperand &RegOp = I.getOperand(1);
582 RegOp.setReg(PromoteReg);
583
584 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
585 KnownValid = true;
586 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000587 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000588
589 // If the destination is a physical register, then there's nothing to
590 // change, so we're done.
591 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
592 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000593 }
594
Jessica Paquette245047d2019-01-24 22:00:41 +0000595 // No need to constrain SrcReg. It will get constrained when we hit another
596 // of its use or its defs. Copies do not have constraints.
597 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000598 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
599 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000600 return false;
601 }
602 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000603 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000604}
605
Tim Northover69271c62016-10-12 22:49:11 +0000606static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
607 if (!DstTy.isScalar() || !SrcTy.isScalar())
608 return GenericOpc;
609
610 const unsigned DstSize = DstTy.getSizeInBits();
611 const unsigned SrcSize = SrcTy.getSizeInBits();
612
613 switch (DstSize) {
614 case 32:
615 switch (SrcSize) {
616 case 32:
617 switch (GenericOpc) {
618 case TargetOpcode::G_SITOFP:
619 return AArch64::SCVTFUWSri;
620 case TargetOpcode::G_UITOFP:
621 return AArch64::UCVTFUWSri;
622 case TargetOpcode::G_FPTOSI:
623 return AArch64::FCVTZSUWSr;
624 case TargetOpcode::G_FPTOUI:
625 return AArch64::FCVTZUUWSr;
626 default:
627 return GenericOpc;
628 }
629 case 64:
630 switch (GenericOpc) {
631 case TargetOpcode::G_SITOFP:
632 return AArch64::SCVTFUXSri;
633 case TargetOpcode::G_UITOFP:
634 return AArch64::UCVTFUXSri;
635 case TargetOpcode::G_FPTOSI:
636 return AArch64::FCVTZSUWDr;
637 case TargetOpcode::G_FPTOUI:
638 return AArch64::FCVTZUUWDr;
639 default:
640 return GenericOpc;
641 }
642 default:
643 return GenericOpc;
644 }
645 case 64:
646 switch (SrcSize) {
647 case 32:
648 switch (GenericOpc) {
649 case TargetOpcode::G_SITOFP:
650 return AArch64::SCVTFUWDri;
651 case TargetOpcode::G_UITOFP:
652 return AArch64::UCVTFUWDri;
653 case TargetOpcode::G_FPTOSI:
654 return AArch64::FCVTZSUXSr;
655 case TargetOpcode::G_FPTOUI:
656 return AArch64::FCVTZUUXSr;
657 default:
658 return GenericOpc;
659 }
660 case 64:
661 switch (GenericOpc) {
662 case TargetOpcode::G_SITOFP:
663 return AArch64::SCVTFUXDri;
664 case TargetOpcode::G_UITOFP:
665 return AArch64::UCVTFUXDri;
666 case TargetOpcode::G_FPTOSI:
667 return AArch64::FCVTZSUXDr;
668 case TargetOpcode::G_FPTOUI:
669 return AArch64::FCVTZUUXDr;
670 default:
671 return GenericOpc;
672 }
673 default:
674 return GenericOpc;
675 }
676 default:
677 return GenericOpc;
678 };
679 return GenericOpc;
680}
681
Tim Northover6c02ad52016-10-12 22:49:04 +0000682static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
683 switch (P) {
684 default:
685 llvm_unreachable("Unknown condition code!");
686 case CmpInst::ICMP_NE:
687 return AArch64CC::NE;
688 case CmpInst::ICMP_EQ:
689 return AArch64CC::EQ;
690 case CmpInst::ICMP_SGT:
691 return AArch64CC::GT;
692 case CmpInst::ICMP_SGE:
693 return AArch64CC::GE;
694 case CmpInst::ICMP_SLT:
695 return AArch64CC::LT;
696 case CmpInst::ICMP_SLE:
697 return AArch64CC::LE;
698 case CmpInst::ICMP_UGT:
699 return AArch64CC::HI;
700 case CmpInst::ICMP_UGE:
701 return AArch64CC::HS;
702 case CmpInst::ICMP_ULT:
703 return AArch64CC::LO;
704 case CmpInst::ICMP_ULE:
705 return AArch64CC::LS;
706 }
707}
708
Tim Northover7dd378d2016-10-12 22:49:07 +0000709static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
710 AArch64CC::CondCode &CondCode,
711 AArch64CC::CondCode &CondCode2) {
712 CondCode2 = AArch64CC::AL;
713 switch (P) {
714 default:
715 llvm_unreachable("Unknown FP condition!");
716 case CmpInst::FCMP_OEQ:
717 CondCode = AArch64CC::EQ;
718 break;
719 case CmpInst::FCMP_OGT:
720 CondCode = AArch64CC::GT;
721 break;
722 case CmpInst::FCMP_OGE:
723 CondCode = AArch64CC::GE;
724 break;
725 case CmpInst::FCMP_OLT:
726 CondCode = AArch64CC::MI;
727 break;
728 case CmpInst::FCMP_OLE:
729 CondCode = AArch64CC::LS;
730 break;
731 case CmpInst::FCMP_ONE:
732 CondCode = AArch64CC::MI;
733 CondCode2 = AArch64CC::GT;
734 break;
735 case CmpInst::FCMP_ORD:
736 CondCode = AArch64CC::VC;
737 break;
738 case CmpInst::FCMP_UNO:
739 CondCode = AArch64CC::VS;
740 break;
741 case CmpInst::FCMP_UEQ:
742 CondCode = AArch64CC::EQ;
743 CondCode2 = AArch64CC::VS;
744 break;
745 case CmpInst::FCMP_UGT:
746 CondCode = AArch64CC::HI;
747 break;
748 case CmpInst::FCMP_UGE:
749 CondCode = AArch64CC::PL;
750 break;
751 case CmpInst::FCMP_ULT:
752 CondCode = AArch64CC::LT;
753 break;
754 case CmpInst::FCMP_ULE:
755 CondCode = AArch64CC::LE;
756 break;
757 case CmpInst::FCMP_UNE:
758 CondCode = AArch64CC::NE;
759 break;
760 }
761}
762
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000763bool AArch64InstructionSelector::selectCompareBranch(
764 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
765
766 const unsigned CondReg = I.getOperand(0).getReg();
767 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
768 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000769 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
770 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000771 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
772 return false;
773
774 unsigned LHS = CCMI->getOperand(2).getReg();
775 unsigned RHS = CCMI->getOperand(3).getReg();
776 if (!getConstantVRegVal(RHS, MRI))
777 std::swap(RHS, LHS);
778
779 const auto RHSImm = getConstantVRegVal(RHS, MRI);
780 if (!RHSImm || *RHSImm != 0)
781 return false;
782
783 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
784 if (RB.getID() != AArch64::GPRRegBankID)
785 return false;
786
787 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
788 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
789 return false;
790
791 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
792 unsigned CBOpc = 0;
793 if (CmpWidth <= 32)
794 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
795 else if (CmpWidth == 64)
796 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
797 else
798 return false;
799
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000800 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
801 .addUse(LHS)
802 .addMBB(DestMBB)
803 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000804
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000805 I.eraseFromParent();
806 return true;
807}
808
Tim Northovere9600d82017-02-08 17:57:27 +0000809bool AArch64InstructionSelector::selectVaStartAAPCS(
810 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
811 return false;
812}
813
814bool AArch64InstructionSelector::selectVaStartDarwin(
815 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
816 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
817 unsigned ListReg = I.getOperand(0).getReg();
818
819 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
820
821 auto MIB =
822 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
823 .addDef(ArgsAddrReg)
824 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
825 .addImm(0)
826 .addImm(0);
827
828 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
829
830 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
831 .addUse(ArgsAddrReg)
832 .addUse(ListReg)
833 .addImm(0)
834 .addMemOperand(*I.memoperands_begin());
835
836 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
837 I.eraseFromParent();
838 return true;
839}
840
Amara Emerson1e8c1642018-07-31 00:09:02 +0000841void AArch64InstructionSelector::materializeLargeCMVal(
842 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
843 MachineBasicBlock &MBB = *I.getParent();
844 MachineFunction &MF = *MBB.getParent();
845 MachineRegisterInfo &MRI = MF.getRegInfo();
846 MachineIRBuilder MIB(I);
847
Aditya Nandakumarcef44a22018-12-11 00:48:50 +0000848 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +0000849 MovZ->addOperand(MF, I.getOperand(1));
850 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
851 AArch64II::MO_NC);
852 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
853 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
854
855 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
856 unsigned ForceDstReg) {
857 unsigned DstReg = ForceDstReg
858 ? ForceDstReg
859 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
860 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
861 if (auto *GV = dyn_cast<GlobalValue>(V)) {
862 MovI->addOperand(MF, MachineOperand::CreateGA(
863 GV, MovZ->getOperand(1).getOffset(), Flags));
864 } else {
865 MovI->addOperand(
866 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
867 MovZ->getOperand(1).getOffset(), Flags));
868 }
869 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
870 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
871 return DstReg;
872 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +0000873 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +0000874 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
875 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
876 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
877 return;
878}
879
Daniel Sandersf76f3152017-11-16 00:46:35 +0000880bool AArch64InstructionSelector::select(MachineInstr &I,
881 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000882 assert(I.getParent() && "Instruction should be in a basic block!");
883 assert(I.getParent()->getParent() && "Instruction should be in a function!");
884
885 MachineBasicBlock &MBB = *I.getParent();
886 MachineFunction &MF = *MBB.getParent();
887 MachineRegisterInfo &MRI = MF.getRegInfo();
888
Tim Northovercdf23f12016-10-31 18:30:59 +0000889 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000890 // G_PHI requires same handling as PHI
891 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +0000892 // Certain non-generic instructions also need some special handling.
893
894 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
895 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000896
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000897 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +0000898 const unsigned DefReg = I.getOperand(0).getReg();
899 const LLT DefTy = MRI.getType(DefReg);
900
901 const TargetRegisterClass *DefRC = nullptr;
902 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
903 DefRC = TRI.getRegClass(DefReg);
904 } else {
905 const RegClassOrRegBank &RegClassOrBank =
906 MRI.getRegClassOrRegBank(DefReg);
907
908 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
909 if (!DefRC) {
910 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000911 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000912 return false;
913 }
914 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
915 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
916 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000917 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000918 return false;
919 }
920 }
921 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000922 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +0000923
924 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
925 }
926
927 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +0000928 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000929
930 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +0000931 }
932
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000933
934 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000935 LLVM_DEBUG(
936 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000937 return false;
938 }
939
Daniel Sandersf76f3152017-11-16 00:46:35 +0000940 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000941 return true;
942
Tim Northover32a078a2016-09-15 10:09:59 +0000943 LLT Ty =
944 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000945
Amara Emerson3739a202019-03-15 21:59:50 +0000946 MachineIRBuilder MIB(I);
947
Tim Northover69271c62016-10-12 22:49:11 +0000948 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +0000949 case TargetOpcode::G_BRCOND: {
950 if (Ty.getSizeInBits() > 32) {
951 // We shouldn't need this on AArch64, but it would be implemented as an
952 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
953 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000954 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
955 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +0000956 return false;
957 }
958
959 const unsigned CondReg = I.getOperand(0).getReg();
960 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
961
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000962 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
963 // instructions will not be produced, as they are conditional branch
964 // instructions that do not set flags.
965 bool ProduceNonFlagSettingCondBr =
966 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
967 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000968 return true;
969
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000970 if (ProduceNonFlagSettingCondBr) {
971 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
972 .addUse(CondReg)
973 .addImm(/*bit offset=*/0)
974 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +0000975
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000976 I.eraseFromParent();
977 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
978 } else {
979 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
980 .addDef(AArch64::WZR)
981 .addUse(CondReg)
982 .addImm(1);
983 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
984 auto Bcc =
985 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
986 .addImm(AArch64CC::EQ)
987 .addMBB(DestMBB);
988
989 I.eraseFromParent();
990 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
991 }
Tim Northover5e3dbf32016-10-12 22:49:01 +0000992 }
993
Kristof Beyls65a12c02017-01-30 09:13:18 +0000994 case TargetOpcode::G_BRINDIRECT: {
995 I.setDesc(TII.get(AArch64::BR));
996 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
997 }
998
Tim Northover4494d692016-10-18 19:47:57 +0000999 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001000 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001001 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1002
1003 const LLT s32 = LLT::scalar(32);
1004 const LLT s64 = LLT::scalar(64);
1005 const LLT p0 = LLT::pointer(0, 64);
1006
1007 const unsigned DefReg = I.getOperand(0).getReg();
1008 const LLT DefTy = MRI.getType(DefReg);
1009 const unsigned DefSize = DefTy.getSizeInBits();
1010 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1011
1012 // FIXME: Redundant check, but even less readable when factored out.
1013 if (isFP) {
1014 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001015 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1016 << " constant, expected: " << s32 << " or " << s64
1017 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001018 return false;
1019 }
1020
1021 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001022 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1023 << " constant on bank: " << RB
1024 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001025 return false;
1026 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001027
1028 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1029 // can be sure tablegen works correctly and isn't rescued by this code.
1030 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1031 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001032 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001033 // s32 and s64 are covered by tablegen.
1034 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001035 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1036 << " constant, expected: " << s32 << ", " << s64
1037 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001038 return false;
1039 }
1040
1041 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001042 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1043 << " constant on bank: " << RB
1044 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001045 return false;
1046 }
1047 }
1048
1049 const unsigned MovOpc =
1050 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1051
1052 I.setDesc(TII.get(MovOpc));
1053
1054 if (isFP) {
1055 const TargetRegisterClass &GPRRC =
1056 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1057 const TargetRegisterClass &FPRRC =
1058 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1059
1060 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1061 MachineOperand &RegOp = I.getOperand(0);
1062 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001063 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1064 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001065
1066 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001067 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001068 return false;
1069 }
1070
1071 MachineOperand &ImmOp = I.getOperand(1);
1072 // FIXME: Is going through int64_t always correct?
1073 ImmOp.ChangeToImmediate(
1074 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001075 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001076 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1077 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001078 } else if (I.getOperand(1).isImm()) {
1079 uint64_t Val = I.getOperand(1).getImm();
1080 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001081 }
1082
1083 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1084 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001085 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001086 case TargetOpcode::G_EXTRACT: {
1087 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001088 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001089 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001090 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001091 // Larger extracts are vectors, same-size extracts should be something else
1092 // by now (either split up or simplified to a COPY).
1093 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1094 return false;
1095
Amara Emersonbc03bae2018-02-18 17:03:02 +00001096 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001097 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1098 Ty.getSizeInBits() - 1);
1099
Amara Emersonbc03bae2018-02-18 17:03:02 +00001100 if (SrcSize < 64) {
1101 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1102 "unexpected G_EXTRACT types");
1103 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1104 }
1105
Tim Northover7b6d66c2017-07-20 22:58:38 +00001106 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001107 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001108 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1109 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001110 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1111 AArch64::GPR32RegClass, MRI);
1112 I.getOperand(0).setReg(DstReg);
1113
1114 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1115 }
1116
1117 case TargetOpcode::G_INSERT: {
1118 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001119 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1120 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001121 // Larger inserts are vectors, same-size ones should be something else by
1122 // now (split up or turned into COPYs).
1123 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1124 return false;
1125
Amara Emersonbc03bae2018-02-18 17:03:02 +00001126 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001127 unsigned LSB = I.getOperand(3).getImm();
1128 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001129 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001130 MachineInstrBuilder(MF, I).addImm(Width - 1);
1131
Amara Emersonbc03bae2018-02-18 17:03:02 +00001132 if (DstSize < 64) {
1133 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1134 "unexpected G_INSERT types");
1135 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1136 }
1137
Tim Northover7b6d66c2017-07-20 22:58:38 +00001138 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1139 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1140 TII.get(AArch64::SUBREG_TO_REG))
1141 .addDef(SrcReg)
1142 .addImm(0)
1143 .addUse(I.getOperand(2).getReg())
1144 .addImm(AArch64::sub_32);
1145 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1146 AArch64::GPR32RegClass, MRI);
1147 I.getOperand(2).setReg(SrcReg);
1148
1149 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1150 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001151 case TargetOpcode::G_FRAME_INDEX: {
1152 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001153 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001154 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1155 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001156 return false;
1157 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001158 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001159
1160 // MOs for a #0 shifted immediate.
1161 I.addOperand(MachineOperand::CreateImm(0));
1162 I.addOperand(MachineOperand::CreateImm(0));
1163
1164 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1165 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001166
1167 case TargetOpcode::G_GLOBAL_VALUE: {
1168 auto GV = I.getOperand(1).getGlobal();
1169 if (GV->isThreadLocal()) {
1170 // FIXME: we don't support TLS yet.
1171 return false;
1172 }
1173 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001174 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001175 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001176 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001177 } else if (TM.getCodeModel() == CodeModel::Large) {
1178 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001179 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001180 I.eraseFromParent();
1181 return true;
David Green9dd1d452018-08-22 11:31:39 +00001182 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1183 I.setDesc(TII.get(AArch64::ADR));
1184 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001185 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001186 I.setDesc(TII.get(AArch64::MOVaddr));
1187 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1188 MachineInstrBuilder MIB(MF, I);
1189 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1190 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1191 }
1192 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1193 }
1194
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001195 case TargetOpcode::G_LOAD:
1196 case TargetOpcode::G_STORE: {
Tim Northover0f140c72016-09-09 11:46:34 +00001197 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001198
Tim Northover5ae83502016-09-15 09:20:34 +00001199 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001200 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1201 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001202 return false;
1203 }
1204
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001205 auto &MemOp = **I.memoperands_begin();
1206 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001207 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001208 return false;
1209 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001210 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001211
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001212 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001213#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001214 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001215 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001216 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1217 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001218 assert(MRI.getType(PtrReg).isPointer() &&
1219 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001220#endif
1221
1222 const unsigned ValReg = I.getOperand(0).getReg();
1223 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1224
1225 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001226 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001227 if (NewOpc == I.getOpcode())
1228 return false;
1229
1230 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001231
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001232 uint64_t Offset = 0;
1233 auto *PtrMI = MRI.getVRegDef(PtrReg);
1234
1235 // Try to fold a GEP into our unsigned immediate addressing mode.
1236 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1237 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1238 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001239 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001240 const unsigned Scale = Log2_32(Size);
1241 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1242 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1243 I.getOperand(1).setReg(Ptr2Reg);
1244 PtrMI = MRI.getVRegDef(Ptr2Reg);
1245 Offset = Imm / Size;
1246 }
1247 }
1248 }
1249
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001250 // If we haven't folded anything into our addressing mode yet, try to fold
1251 // a frame index into the base+offset.
1252 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1253 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1254
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001255 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001256
1257 // If we're storing a 0, use WZR/XZR.
1258 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1259 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1260 if (I.getOpcode() == AArch64::STRWui)
1261 I.getOperand(0).setReg(AArch64::WZR);
1262 else if (I.getOpcode() == AArch64::STRXui)
1263 I.getOperand(0).setReg(AArch64::XZR);
1264 }
1265 }
1266
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001267 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1268 }
1269
Tim Northover9dd78f82017-02-08 21:22:25 +00001270 case TargetOpcode::G_SMULH:
1271 case TargetOpcode::G_UMULH: {
1272 // Reject the various things we don't support yet.
1273 if (unsupportedBinOp(I, RBI, MRI, TRI))
1274 return false;
1275
1276 const unsigned DefReg = I.getOperand(0).getReg();
1277 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1278
1279 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001280 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001281 return false;
1282 }
1283
1284 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001285 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1286 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001287 return false;
1288 }
1289
1290 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1291 : AArch64::UMULHrr;
1292 I.setDesc(TII.get(NewOpc));
1293
1294 // Now that we selected an opcode, we need to constrain the register
1295 // operands to use appropriate classes.
1296 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1297 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001298 case TargetOpcode::G_FADD:
1299 case TargetOpcode::G_FSUB:
1300 case TargetOpcode::G_FMUL:
1301 case TargetOpcode::G_FDIV:
1302
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001303 case TargetOpcode::G_OR:
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001304 case TargetOpcode::G_SHL:
1305 case TargetOpcode::G_LSHR:
1306 case TargetOpcode::G_ASHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001307 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001308 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001309 if (unsupportedBinOp(I, RBI, MRI, TRI))
1310 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001311
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001312 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001313
1314 const unsigned DefReg = I.getOperand(0).getReg();
1315 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1316
1317 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1318 if (NewOpc == I.getOpcode())
1319 return false;
1320
1321 I.setDesc(TII.get(NewOpc));
1322 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001323
1324 // Now that we selected an opcode, we need to constrain the register
1325 // operands to use appropriate classes.
1326 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1327 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001328
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001329 case TargetOpcode::G_UADDO: {
1330 // TODO: Support other types.
1331 unsigned OpSize = Ty.getSizeInBits();
1332 if (OpSize != 32 && OpSize != 64) {
1333 LLVM_DEBUG(
1334 dbgs()
1335 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1336 return false;
1337 }
1338
1339 // TODO: Support vectors.
1340 if (Ty.isVector()) {
1341 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1342 return false;
1343 }
1344
1345 // Add and set the set condition flag.
1346 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1347 MachineIRBuilder MIRBuilder(I);
1348 auto AddsMI = MIRBuilder.buildInstr(
1349 AddsOpc, {I.getOperand(0).getReg()},
1350 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1351 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1352
1353 // Now, put the overflow result in the register given by the first operand
1354 // to the G_UADDO. CSINC increments the result when the predicate is false,
1355 // so to get the increment when it's true, we need to use the inverse. In
1356 // this case, we want to increment when carry is set.
1357 auto CsetMI = MIRBuilder
1358 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1359 {AArch64::WZR, AArch64::WZR})
1360 .addImm(getInvertedCondCode(AArch64CC::HS));
1361 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1362 I.eraseFromParent();
1363 return true;
1364 }
1365
Tim Northover398c5f52017-02-14 20:56:29 +00001366 case TargetOpcode::G_PTR_MASK: {
1367 uint64_t Align = I.getOperand(2).getImm();
1368 if (Align >= 64 || Align == 0)
1369 return false;
1370
1371 uint64_t Mask = ~((1ULL << Align) - 1);
1372 I.setDesc(TII.get(AArch64::ANDXri));
1373 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1374
1375 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1376 }
Tim Northover037af52c2016-10-31 18:31:09 +00001377 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001378 case TargetOpcode::G_TRUNC: {
1379 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1380 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1381
1382 const unsigned DstReg = I.getOperand(0).getReg();
1383 const unsigned SrcReg = I.getOperand(1).getReg();
1384
1385 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1386 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1387
1388 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001389 LLVM_DEBUG(
1390 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001391 return false;
1392 }
1393
1394 if (DstRB.getID() == AArch64::GPRRegBankID) {
1395 const TargetRegisterClass *DstRC =
1396 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1397 if (!DstRC)
1398 return false;
1399
1400 const TargetRegisterClass *SrcRC =
1401 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1402 if (!SrcRC)
1403 return false;
1404
1405 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1406 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001407 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001408 return false;
1409 }
1410
1411 if (DstRC == SrcRC) {
1412 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001413 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1414 SrcTy == LLT::scalar(64)) {
1415 llvm_unreachable("TableGen can import this case");
1416 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001417 } else if (DstRC == &AArch64::GPR32RegClass &&
1418 SrcRC == &AArch64::GPR64RegClass) {
1419 I.getOperand(1).setSubReg(AArch64::sub_32);
1420 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001421 LLVM_DEBUG(
1422 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001423 return false;
1424 }
1425
1426 I.setDesc(TII.get(TargetOpcode::COPY));
1427 return true;
1428 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1429 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1430 I.setDesc(TII.get(AArch64::XTNv4i16));
1431 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1432 return true;
1433 }
1434 }
1435
1436 return false;
1437 }
1438
Tim Northover3d38b3a2016-10-11 20:50:21 +00001439 case TargetOpcode::G_ANYEXT: {
1440 const unsigned DstReg = I.getOperand(0).getReg();
1441 const unsigned SrcReg = I.getOperand(1).getReg();
1442
Quentin Colombetcb629a82016-10-12 03:57:49 +00001443 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1444 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001445 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1446 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001447 return false;
1448 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001449
Quentin Colombetcb629a82016-10-12 03:57:49 +00001450 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1451 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001452 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1453 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001454 return false;
1455 }
1456
1457 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1458
1459 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001460 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001461 return false;
1462 }
1463
Quentin Colombetcb629a82016-10-12 03:57:49 +00001464 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001465 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1466 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001467 return false;
1468 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001469 // At this point G_ANYEXT is just like a plain COPY, but we need
1470 // to explicitly form the 64-bit value if any.
1471 if (DstSize > 32) {
1472 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1473 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1474 .addDef(ExtSrc)
1475 .addImm(0)
1476 .addUse(SrcReg)
1477 .addImm(AArch64::sub_32);
1478 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001479 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001480 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001481 }
1482
1483 case TargetOpcode::G_ZEXT:
1484 case TargetOpcode::G_SEXT: {
1485 unsigned Opcode = I.getOpcode();
1486 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1487 SrcTy = MRI.getType(I.getOperand(1).getReg());
1488 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1489 const unsigned DefReg = I.getOperand(0).getReg();
1490 const unsigned SrcReg = I.getOperand(1).getReg();
1491 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1492
1493 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001494 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1495 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001496 return false;
1497 }
1498
1499 MachineInstr *ExtI;
1500 if (DstTy == LLT::scalar(64)) {
1501 // FIXME: Can we avoid manually doing this?
1502 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001503 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1504 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001505 return false;
1506 }
1507
1508 const unsigned SrcXReg =
1509 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1510 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1511 .addDef(SrcXReg)
1512 .addImm(0)
1513 .addUse(SrcReg)
1514 .addImm(AArch64::sub_32);
1515
1516 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1517 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1518 .addDef(DefReg)
1519 .addUse(SrcXReg)
1520 .addImm(0)
1521 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001522 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001523 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1524 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1525 .addDef(DefReg)
1526 .addUse(SrcReg)
1527 .addImm(0)
1528 .addImm(SrcTy.getSizeInBits() - 1);
1529 } else {
1530 return false;
1531 }
1532
1533 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1534
1535 I.eraseFromParent();
1536 return true;
1537 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001538
Tim Northover69271c62016-10-12 22:49:11 +00001539 case TargetOpcode::G_SITOFP:
1540 case TargetOpcode::G_UITOFP:
1541 case TargetOpcode::G_FPTOSI:
1542 case TargetOpcode::G_FPTOUI: {
1543 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1544 SrcTy = MRI.getType(I.getOperand(1).getReg());
1545 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1546 if (NewOpc == Opcode)
1547 return false;
1548
1549 I.setDesc(TII.get(NewOpc));
1550 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1551
1552 return true;
1553 }
1554
1555
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001556 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001557 // The importer is currently unable to import pointer types since they
1558 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001559 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001560
Daniel Sandersedd07842017-08-17 09:26:14 +00001561 case TargetOpcode::G_BITCAST:
1562 // Imported SelectionDAG rules can handle every bitcast except those that
1563 // bitcast from a type to the same type. Ideally, these shouldn't occur
1564 // but we might not run an optimizer that deletes them.
1565 if (MRI.getType(I.getOperand(0).getReg()) ==
1566 MRI.getType(I.getOperand(1).getReg()))
1567 return selectCopy(I, TII, MRI, TRI, RBI);
1568 return false;
1569
Tim Northover9ac0eba2016-11-08 00:45:29 +00001570 case TargetOpcode::G_SELECT: {
1571 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001572 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1573 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001574 return false;
1575 }
1576
1577 const unsigned CondReg = I.getOperand(1).getReg();
1578 const unsigned TReg = I.getOperand(2).getReg();
1579 const unsigned FReg = I.getOperand(3).getReg();
1580
1581 unsigned CSelOpc = 0;
1582
1583 if (Ty == LLT::scalar(32)) {
1584 CSelOpc = AArch64::CSELWr;
Kristof Beylse9412b42017-01-19 13:32:14 +00001585 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
Tim Northover9ac0eba2016-11-08 00:45:29 +00001586 CSelOpc = AArch64::CSELXr;
1587 } else {
1588 return false;
1589 }
1590
1591 MachineInstr &TstMI =
1592 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1593 .addDef(AArch64::WZR)
1594 .addUse(CondReg)
1595 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1596
1597 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1598 .addDef(I.getOperand(0).getReg())
1599 .addUse(TReg)
1600 .addUse(FReg)
1601 .addImm(AArch64CC::NE);
1602
1603 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1604 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1605
1606 I.eraseFromParent();
1607 return true;
1608 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001609 case TargetOpcode::G_ICMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001610 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001611 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1612 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001613 return false;
1614 }
1615
1616 unsigned CmpOpc = 0;
1617 unsigned ZReg = 0;
1618
1619 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1620 if (CmpTy == LLT::scalar(32)) {
1621 CmpOpc = AArch64::SUBSWrr;
1622 ZReg = AArch64::WZR;
1623 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1624 CmpOpc = AArch64::SUBSXrr;
1625 ZReg = AArch64::XZR;
1626 } else {
1627 return false;
1628 }
1629
Kristof Beyls22524402017-01-05 10:16:08 +00001630 // CSINC increments the result by one when the condition code is false.
1631 // Therefore, we have to invert the predicate to get an increment by 1 when
1632 // the predicate is true.
1633 const AArch64CC::CondCode invCC =
1634 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1635 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001636
1637 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1638 .addDef(ZReg)
1639 .addUse(I.getOperand(2).getReg())
1640 .addUse(I.getOperand(3).getReg());
1641
1642 MachineInstr &CSetMI =
1643 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1644 .addDef(I.getOperand(0).getReg())
1645 .addUse(AArch64::WZR)
1646 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001647 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001648
1649 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1650 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1651
1652 I.eraseFromParent();
1653 return true;
1654 }
1655
Tim Northover7dd378d2016-10-12 22:49:07 +00001656 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001657 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001658 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1659 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001660 return false;
1661 }
1662
1663 unsigned CmpOpc = 0;
1664 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1665 if (CmpTy == LLT::scalar(32)) {
1666 CmpOpc = AArch64::FCMPSrr;
1667 } else if (CmpTy == LLT::scalar(64)) {
1668 CmpOpc = AArch64::FCMPDrr;
1669 } else {
1670 return false;
1671 }
1672
1673 // FIXME: regbank
1674
1675 AArch64CC::CondCode CC1, CC2;
1676 changeFCMPPredToAArch64CC(
1677 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1678
1679 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1680 .addUse(I.getOperand(2).getReg())
1681 .addUse(I.getOperand(3).getReg());
1682
1683 const unsigned DefReg = I.getOperand(0).getReg();
1684 unsigned Def1Reg = DefReg;
1685 if (CC2 != AArch64CC::AL)
1686 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1687
1688 MachineInstr &CSetMI =
1689 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1690 .addDef(Def1Reg)
1691 .addUse(AArch64::WZR)
1692 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001693 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001694
1695 if (CC2 != AArch64CC::AL) {
1696 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1697 MachineInstr &CSet2MI =
1698 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1699 .addDef(Def2Reg)
1700 .addUse(AArch64::WZR)
1701 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001702 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001703 MachineInstr &OrMI =
1704 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1705 .addDef(DefReg)
1706 .addUse(Def1Reg)
1707 .addUse(Def2Reg);
1708 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1709 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1710 }
1711
1712 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1713 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1714
1715 I.eraseFromParent();
1716 return true;
1717 }
Tim Northovere9600d82017-02-08 17:57:27 +00001718 case TargetOpcode::G_VASTART:
1719 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1720 : selectVaStartAAPCS(I, MF, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001721 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1722 if (!I.getOperand(0).isIntrinsicID())
1723 return false;
1724 if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
1725 return false;
1726 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
1727 .addImm(1);
1728 I.eraseFromParent();
1729 return true;
Amara Emerson1e8c1642018-07-31 00:09:02 +00001730 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001731 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001732 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1733 const unsigned DstReg = I.getOperand(0).getReg();
1734 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1735 const TargetRegisterClass *DstRC =
1736 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1737 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001738 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001739 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001740 case TargetOpcode::G_BLOCK_ADDR: {
1741 if (TM.getCodeModel() == CodeModel::Large) {
1742 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1743 I.eraseFromParent();
1744 return true;
1745 } else {
1746 I.setDesc(TII.get(AArch64::MOVaddrBA));
1747 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1748 I.getOperand(0).getReg())
1749 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1750 /* Offset */ 0, AArch64II::MO_PAGE)
1751 .addBlockAddress(
1752 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1753 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1754 I.eraseFromParent();
1755 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1756 }
1757 }
Amara Emerson5ec14602018-12-10 18:44:58 +00001758 case TargetOpcode::G_BUILD_VECTOR:
1759 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001760 case TargetOpcode::G_MERGE_VALUES:
1761 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001762 case TargetOpcode::G_UNMERGE_VALUES:
1763 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001764 case TargetOpcode::G_SHUFFLE_VECTOR:
1765 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001766 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1767 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001768 case TargetOpcode::G_INSERT_VECTOR_ELT:
1769 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00001770 case TargetOpcode::G_CONCAT_VECTORS:
1771 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001772 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001773
1774 return false;
1775}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00001776
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001777MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00001778 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001779 MachineIRBuilder &MIRBuilder) const {
1780 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00001781
1782 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001783 auto Ins =
1784 MIRBuilder
1785 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
1786 .addImm(SubregIndex);
1787 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
1788 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
1789 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00001790 };
1791
Amara Emerson8acb0d92019-03-04 19:16:00 +00001792 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00001793 case 16:
1794 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00001795 case 32:
1796 return BuildFn(AArch64::ssub);
1797 case 64:
1798 return BuildFn(AArch64::dsub);
1799 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001800 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00001801 }
1802}
1803
Amara Emerson8cb186c2018-12-20 01:11:04 +00001804bool AArch64InstructionSelector::selectMergeValues(
1805 MachineInstr &I, MachineRegisterInfo &MRI) const {
1806 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
1807 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1808 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1809 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
1810
1811 // At the moment we only support merging two s32s into an s64.
1812 if (I.getNumOperands() != 3)
1813 return false;
1814 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
1815 return false;
1816 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
1817 if (RB.getID() != AArch64::GPRRegBankID)
1818 return false;
1819
1820 auto *DstRC = &AArch64::GPR64RegClass;
1821 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
1822 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1823 TII.get(TargetOpcode::SUBREG_TO_REG))
1824 .addDef(SubToRegDef)
1825 .addImm(0)
1826 .addUse(I.getOperand(1).getReg())
1827 .addImm(AArch64::sub_32);
1828 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
1829 // Need to anyext the second scalar before we can use bfm
1830 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1831 TII.get(TargetOpcode::SUBREG_TO_REG))
1832 .addDef(SubToRegDef2)
1833 .addImm(0)
1834 .addUse(I.getOperand(2).getReg())
1835 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001836 MachineInstr &BFM =
1837 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00001838 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00001839 .addUse(SubToRegDef)
1840 .addUse(SubToRegDef2)
1841 .addImm(32)
1842 .addImm(31);
1843 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
1844 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
1845 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
1846 I.eraseFromParent();
1847 return true;
1848}
1849
Jessica Paquette607774c2019-03-11 22:18:01 +00001850static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
1851 const unsigned EltSize) {
1852 // Choose a lane copy opcode and subregister based off of the size of the
1853 // vector's elements.
1854 switch (EltSize) {
1855 case 16:
1856 CopyOpc = AArch64::CPYi16;
1857 ExtractSubReg = AArch64::hsub;
1858 break;
1859 case 32:
1860 CopyOpc = AArch64::CPYi32;
1861 ExtractSubReg = AArch64::ssub;
1862 break;
1863 case 64:
1864 CopyOpc = AArch64::CPYi64;
1865 ExtractSubReg = AArch64::dsub;
1866 break;
1867 default:
1868 // Unknown size, bail out.
1869 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
1870 return false;
1871 }
1872 return true;
1873}
1874
Jessica Paquettebb1aced2019-03-13 21:19:29 +00001875/// Given a register \p Reg, find the value of a constant defining \p Reg.
1876/// Return true if one could be found, and store it in \p Val. Return false
1877/// otherwise.
1878static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI,
1879 unsigned &Val) {
1880 // Look at the def of the register.
1881 MachineInstr *Def = MRI.getVRegDef(Reg);
1882 if (!Def)
1883 return false;
1884
1885 // Find the first definition which isn't a copy.
1886 if (Def->isCopy()) {
1887 Reg = Def->getOperand(1).getReg();
1888 auto It = find_if_not(MRI.reg_nodbg_instructions(Reg),
1889 [](const MachineInstr &MI) { return MI.isCopy(); });
1890 if (It == MRI.reg_instr_nodbg_end()) {
1891 LLVM_DEBUG(dbgs() << "Couldn't find non-copy def for register\n");
1892 return false;
1893 }
1894 Def = &*It;
1895 }
1896
1897 // TODO: Handle opcodes other than G_CONSTANT.
1898 if (Def->getOpcode() != TargetOpcode::G_CONSTANT) {
1899 LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
1900 "currently unsupported.\n");
1901 return false;
1902 }
1903
1904 // Return the constant value associated with the operand.
1905 Val = Def->getOperand(1).getCImm()->getLimitedValue();
1906 return true;
1907}
1908
Amara Emersond61b89b2019-03-14 22:48:18 +00001909MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
1910 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
1911 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
1912 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1913 unsigned CopyOpc = 0;
1914 unsigned ExtractSubReg = 0;
1915 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
1916 LLVM_DEBUG(
1917 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
1918 return nullptr;
1919 }
1920
1921 const TargetRegisterClass *DstRC =
1922 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
1923 if (!DstRC) {
1924 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
1925 return nullptr;
1926 }
1927
1928 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
1929 const LLT &VecTy = MRI.getType(VecReg);
1930 const TargetRegisterClass *VecRC =
1931 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
1932 if (!VecRC) {
1933 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
1934 return nullptr;
1935 }
1936
1937 // The register that we're going to copy into.
1938 unsigned InsertReg = VecReg;
1939 if (!DstReg)
1940 DstReg = MRI.createVirtualRegister(DstRC);
1941 // If the lane index is 0, we just use a subregister COPY.
1942 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00001943 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
1944 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001945 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00001946 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00001947 }
1948
1949 // Lane copies require 128-bit wide registers. If we're dealing with an
1950 // unpacked vector, then we need to move up to that width. Insert an implicit
1951 // def and a subregister insert to get us there.
1952 if (VecTy.getSizeInBits() != 128) {
1953 MachineInstr *ScalarToVector = emitScalarToVector(
1954 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
1955 if (!ScalarToVector)
1956 return nullptr;
1957 InsertReg = ScalarToVector->getOperand(0).getReg();
1958 }
1959
1960 MachineInstr *LaneCopyMI =
1961 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
1962 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
1963
1964 // Make sure that we actually constrain the initial copy.
1965 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
1966 return LaneCopyMI;
1967}
1968
Jessica Paquette607774c2019-03-11 22:18:01 +00001969bool AArch64InstructionSelector::selectExtractElt(
1970 MachineInstr &I, MachineRegisterInfo &MRI) const {
1971 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
1972 "unexpected opcode!");
1973 unsigned DstReg = I.getOperand(0).getReg();
1974 const LLT NarrowTy = MRI.getType(DstReg);
1975 const unsigned SrcReg = I.getOperand(1).getReg();
1976 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001977 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00001978 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
1979 "source register size too small!");
1980 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
1981
1982 // Need the lane index to determine the correct copy opcode.
1983 MachineOperand &LaneIdxOp = I.getOperand(2);
1984 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
1985
1986 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
1987 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
1988 return false;
1989 }
1990
Jessica Paquettebb1aced2019-03-13 21:19:29 +00001991 // Find the index to extract from.
1992 unsigned LaneIdx = 0;
1993 if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
Jessica Paquette607774c2019-03-11 22:18:01 +00001994 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00001995
Jessica Paquette607774c2019-03-11 22:18:01 +00001996 MachineIRBuilder MIRBuilder(I);
1997
Amara Emersond61b89b2019-03-14 22:48:18 +00001998 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1999 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2000 LaneIdx, MIRBuilder);
2001 if (!Extract)
2002 return false;
2003
2004 I.eraseFromParent();
2005 return true;
2006}
2007
2008bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2009 MachineInstr &I, MachineRegisterInfo &MRI) const {
2010 unsigned NumElts = I.getNumOperands() - 1;
2011 unsigned SrcReg = I.getOperand(NumElts).getReg();
2012 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2013 const LLT SrcTy = MRI.getType(SrcReg);
2014
2015 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2016 if (SrcTy.getSizeInBits() > 128) {
2017 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2018 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002019 }
2020
Amara Emersond61b89b2019-03-14 22:48:18 +00002021 MachineIRBuilder MIB(I);
2022
2023 // We implement a split vector operation by treating the sub-vectors as
2024 // scalars and extracting them.
2025 const RegisterBank &DstRB =
2026 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2027 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2028 unsigned Dst = I.getOperand(OpIdx).getReg();
2029 MachineInstr *Extract =
2030 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2031 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002032 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002033 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002034 I.eraseFromParent();
2035 return true;
2036}
2037
Jessica Paquette245047d2019-01-24 22:00:41 +00002038bool AArch64InstructionSelector::selectUnmergeValues(
2039 MachineInstr &I, MachineRegisterInfo &MRI) const {
2040 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2041 "unexpected opcode");
2042
2043 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2044 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2045 AArch64::FPRRegBankID ||
2046 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2047 AArch64::FPRRegBankID) {
2048 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2049 "currently unsupported.\n");
2050 return false;
2051 }
2052
2053 // The last operand is the vector source register, and every other operand is
2054 // a register to unpack into.
2055 unsigned NumElts = I.getNumOperands() - 1;
2056 unsigned SrcReg = I.getOperand(NumElts).getReg();
2057 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2058 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002059 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002060 assert(WideTy.isVector() && "can only unmerge from vector types!");
2061 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2062 "source register size too small!");
2063
Amara Emersond61b89b2019-03-14 22:48:18 +00002064 if (!NarrowTy.isScalar())
2065 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002066
Amara Emerson3739a202019-03-15 21:59:50 +00002067 MachineIRBuilder MIB(I);
2068
Jessica Paquette245047d2019-01-24 22:00:41 +00002069 // Choose a lane copy opcode and subregister based off of the size of the
2070 // vector's elements.
2071 unsigned CopyOpc = 0;
2072 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002073 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002074 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002075
2076 // Set up for the lane copies.
2077 MachineBasicBlock &MBB = *I.getParent();
2078
2079 // Stores the registers we'll be copying from.
2080 SmallVector<unsigned, 4> InsertRegs;
2081
2082 // We'll use the first register twice, so we only need NumElts-1 registers.
2083 unsigned NumInsertRegs = NumElts - 1;
2084
2085 // If our elements fit into exactly 128 bits, then we can copy from the source
2086 // directly. Otherwise, we need to do a bit of setup with some subregister
2087 // inserts.
2088 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2089 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2090 } else {
2091 // No. We have to perform subregister inserts. For each insert, create an
2092 // implicit def and a subregister insert, and save the register we create.
2093 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2094 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2095 MachineInstr &ImpDefMI =
2096 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2097 ImpDefReg);
2098
2099 // Now, create the subregister insert from SrcReg.
2100 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2101 MachineInstr &InsMI =
2102 *BuildMI(MBB, I, I.getDebugLoc(),
2103 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2104 .addUse(ImpDefReg)
2105 .addUse(SrcReg)
2106 .addImm(AArch64::dsub);
2107
2108 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2109 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2110
2111 // Save the register so that we can copy from it after.
2112 InsertRegs.push_back(InsertReg);
2113 }
2114 }
2115
2116 // Now that we've created any necessary subregister inserts, we can
2117 // create the copies.
2118 //
2119 // Perform the first copy separately as a subregister copy.
2120 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002121 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2122 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002123 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002124
2125 // Now, perform the remaining copies as vector lane copies.
2126 unsigned LaneIdx = 1;
2127 for (unsigned InsReg : InsertRegs) {
2128 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2129 MachineInstr &CopyInst =
2130 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2131 .addUse(InsReg)
2132 .addImm(LaneIdx);
2133 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2134 ++LaneIdx;
2135 }
2136
2137 // Separately constrain the first copy's destination. Because of the
2138 // limitation in constrainOperandRegClass, we can't guarantee that this will
2139 // actually be constrained. So, do it ourselves using the second operand.
2140 const TargetRegisterClass *RC =
2141 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2142 if (!RC) {
2143 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2144 return false;
2145 }
2146
2147 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2148 I.eraseFromParent();
2149 return true;
2150}
2151
Amara Emerson2ff22982019-03-14 22:48:15 +00002152bool AArch64InstructionSelector::selectConcatVectors(
2153 MachineInstr &I, MachineRegisterInfo &MRI) const {
2154 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2155 "Unexpected opcode");
2156 unsigned Dst = I.getOperand(0).getReg();
2157 unsigned Op1 = I.getOperand(1).getReg();
2158 unsigned Op2 = I.getOperand(2).getReg();
2159 MachineIRBuilder MIRBuilder(I);
2160 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2161 if (!ConcatMI)
2162 return false;
2163 I.eraseFromParent();
2164 return true;
2165}
2166
Amara Emerson1abe05c2019-02-21 20:20:16 +00002167void AArch64InstructionSelector::collectShuffleMaskIndices(
2168 MachineInstr &I, MachineRegisterInfo &MRI,
2169 SmallVectorImpl<int> &Idxs) const {
2170 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2171 assert(
2172 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2173 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2174 // Find the constant indices.
2175 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2176 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2177 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2178 // Look through copies.
2179 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2180 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2181 assert(ScalarDef && "Could not find def of copy operand");
2182 }
2183 assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT);
2184 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2185 }
2186}
2187
2188unsigned
2189AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2190 MachineFunction &MF) const {
2191 Type *CPTy = CPVal->getType()->getPointerTo();
2192 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2193 if (Align == 0)
2194 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2195
2196 MachineConstantPool *MCP = MF.getConstantPool();
2197 return MCP->getConstantPoolIndex(CPVal, Align);
2198}
2199
2200MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2201 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2202 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2203
2204 auto Adrp =
2205 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2206 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002207
2208 MachineInstr *LoadMI = nullptr;
2209 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2210 case 16:
2211 LoadMI =
2212 &*MIRBuilder
2213 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2214 .addConstantPoolIndex(CPIdx, 0,
2215 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2216 break;
2217 case 8:
2218 LoadMI = &*MIRBuilder
2219 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2220 .addConstantPoolIndex(
2221 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2222 break;
2223 default:
2224 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2225 << *CPVal->getType());
2226 return nullptr;
2227 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002228 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002229 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2230 return LoadMI;
2231}
2232
2233/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2234/// size and RB.
2235static std::pair<unsigned, unsigned>
2236getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2237 unsigned Opc, SubregIdx;
2238 if (RB.getID() == AArch64::GPRRegBankID) {
2239 if (EltSize == 32) {
2240 Opc = AArch64::INSvi32gpr;
2241 SubregIdx = AArch64::ssub;
2242 } else if (EltSize == 64) {
2243 Opc = AArch64::INSvi64gpr;
2244 SubregIdx = AArch64::dsub;
2245 } else {
2246 llvm_unreachable("invalid elt size!");
2247 }
2248 } else {
2249 if (EltSize == 8) {
2250 Opc = AArch64::INSvi8lane;
2251 SubregIdx = AArch64::bsub;
2252 } else if (EltSize == 16) {
2253 Opc = AArch64::INSvi16lane;
2254 SubregIdx = AArch64::hsub;
2255 } else if (EltSize == 32) {
2256 Opc = AArch64::INSvi32lane;
2257 SubregIdx = AArch64::ssub;
2258 } else if (EltSize == 64) {
2259 Opc = AArch64::INSvi64lane;
2260 SubregIdx = AArch64::dsub;
2261 } else {
2262 llvm_unreachable("invalid elt size!");
2263 }
2264 }
2265 return std::make_pair(Opc, SubregIdx);
2266}
2267
2268MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002269 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2270 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002271 // We implement a vector concat by:
2272 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2273 // 2. Insert the upper vector into the destination's upper element
2274 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2275 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2276
2277 const LLT Op1Ty = MRI.getType(Op1);
2278 const LLT Op2Ty = MRI.getType(Op2);
2279
2280 if (Op1Ty != Op2Ty) {
2281 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2282 return nullptr;
2283 }
2284 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2285
2286 if (Op1Ty.getSizeInBits() >= 128) {
2287 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2288 return nullptr;
2289 }
2290
2291 // At the moment we just support 64 bit vector concats.
2292 if (Op1Ty.getSizeInBits() != 64) {
2293 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2294 return nullptr;
2295 }
2296
2297 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2298 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2299 const TargetRegisterClass *DstRC =
2300 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2301
2302 MachineInstr *WidenedOp1 =
2303 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2304 MachineInstr *WidenedOp2 =
2305 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2306 if (!WidenedOp1 || !WidenedOp2) {
2307 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2308 return nullptr;
2309 }
2310
2311 // Now do the insert of the upper element.
2312 unsigned InsertOpc, InsSubRegIdx;
2313 std::tie(InsertOpc, InsSubRegIdx) =
2314 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2315
Amara Emerson2ff22982019-03-14 22:48:15 +00002316 if (!Dst)
2317 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002318 auto InsElt =
2319 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002320 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002321 .addImm(1) /* Lane index */
2322 .addUse(WidenedOp2->getOperand(0).getReg())
2323 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002324 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2325 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002326}
2327
2328bool AArch64InstructionSelector::selectShuffleVector(
2329 MachineInstr &I, MachineRegisterInfo &MRI) const {
2330 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2331 unsigned Src1Reg = I.getOperand(1).getReg();
2332 const LLT Src1Ty = MRI.getType(Src1Reg);
2333 unsigned Src2Reg = I.getOperand(2).getReg();
2334 const LLT Src2Ty = MRI.getType(Src2Reg);
2335
2336 MachineBasicBlock &MBB = *I.getParent();
2337 MachineFunction &MF = *MBB.getParent();
2338 LLVMContext &Ctx = MF.getFunction().getContext();
2339
2340 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2341 // operand, it comes in as a normal vector value which we have to analyze to
2342 // find the mask indices.
2343 SmallVector<int, 8> Mask;
2344 collectShuffleMaskIndices(I, MRI, Mask);
2345 assert(!Mask.empty() && "Expected to find mask indices");
2346
2347 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2348 // it's originated from a <1 x T> type. Those should have been lowered into
2349 // G_BUILD_VECTOR earlier.
2350 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2351 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2352 return false;
2353 }
2354
2355 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2356
2357 SmallVector<Constant *, 64> CstIdxs;
2358 for (int Val : Mask) {
2359 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2360 unsigned Offset = Byte + Val * BytesPerElt;
2361 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2362 }
2363 }
2364
Amara Emerson8acb0d92019-03-04 19:16:00 +00002365 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002366
2367 // Use a constant pool to load the index vector for TBL.
2368 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002369 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2370 if (!IndexLoad) {
2371 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2372 return false;
2373 }
2374
Amara Emerson8acb0d92019-03-04 19:16:00 +00002375 if (DstTy.getSizeInBits() != 128) {
2376 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2377 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00002378 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002379 if (!Concat) {
2380 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2381 return false;
2382 }
2383
2384 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2385 IndexLoad =
2386 emitScalarToVector(64, &AArch64::FPR128RegClass,
2387 IndexLoad->getOperand(0).getReg(), MIRBuilder);
2388
2389 auto TBL1 = MIRBuilder.buildInstr(
2390 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2391 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2392 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
2393
Amara Emerson3739a202019-03-15 21:59:50 +00002394 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00002395 MIRBuilder
2396 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2397 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002398 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2399 I.eraseFromParent();
2400 return true;
2401 }
2402
Amara Emerson1abe05c2019-02-21 20:20:16 +00002403 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2404 // Q registers for regalloc.
2405 auto RegSeq = MIRBuilder
2406 .buildInstr(TargetOpcode::REG_SEQUENCE,
2407 {&AArch64::QQRegClass}, {Src1Reg})
2408 .addImm(AArch64::qsub0)
2409 .addUse(Src2Reg)
2410 .addImm(AArch64::qsub1);
2411
2412 auto TBL2 =
2413 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2414 {RegSeq, IndexLoad->getOperand(0).getReg()});
2415 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2416 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
2417 I.eraseFromParent();
2418 return true;
2419}
2420
Jessica Paquette16d67a32019-03-13 23:22:23 +00002421MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2422 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2423 unsigned LaneIdx, const RegisterBank &RB,
2424 MachineIRBuilder &MIRBuilder) const {
2425 MachineInstr *InsElt = nullptr;
2426 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2427 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2428
2429 // Create a register to define with the insert if one wasn't passed in.
2430 if (!DstReg)
2431 DstReg = MRI.createVirtualRegister(DstRC);
2432
2433 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2434 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2435
2436 if (RB.getID() == AArch64::FPRRegBankID) {
2437 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2438 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2439 .addImm(LaneIdx)
2440 .addUse(InsSub->getOperand(0).getReg())
2441 .addImm(0);
2442 } else {
2443 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2444 .addImm(LaneIdx)
2445 .addUse(EltReg);
2446 }
2447
2448 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2449 return InsElt;
2450}
2451
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002452bool AArch64InstructionSelector::selectInsertElt(
2453 MachineInstr &I, MachineRegisterInfo &MRI) const {
2454 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2455
2456 // Get information on the destination.
2457 unsigned DstReg = I.getOperand(0).getReg();
2458 const LLT DstTy = MRI.getType(DstReg);
2459 if (DstTy.getSizeInBits() < 128) {
2460 // TODO: Handle unpacked vectors.
2461 LLVM_DEBUG(dbgs() << "Unpacked vectors not supported yet!");
2462 return false;
2463 }
2464
2465 // Get information on the element we want to insert into the destination.
2466 unsigned EltReg = I.getOperand(2).getReg();
2467 const LLT EltTy = MRI.getType(EltReg);
2468 unsigned EltSize = EltTy.getSizeInBits();
2469 if (EltSize < 16 || EltSize > 64)
2470 return false; // Don't support all element types yet.
2471
2472 // Find the definition of the index. Bail out if it's not defined by a
2473 // G_CONSTANT.
2474 unsigned IdxReg = I.getOperand(3).getReg();
2475 unsigned LaneIdx = 0;
2476 if (!getConstantValueForReg(IdxReg, MRI, LaneIdx))
2477 return false;
2478
2479 // Perform the lane insert.
2480 unsigned SrcReg = I.getOperand(1).getReg();
2481 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2482 MachineIRBuilder MIRBuilder(I);
2483 emitLaneInsert(DstReg, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2484 I.eraseFromParent();
2485 return true;
2486}
2487
Amara Emerson5ec14602018-12-10 18:44:58 +00002488bool AArch64InstructionSelector::selectBuildVector(
2489 MachineInstr &I, MachineRegisterInfo &MRI) const {
2490 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
2491 // Until we port more of the optimized selections, for now just use a vector
2492 // insert sequence.
2493 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2494 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
2495 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00002496 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00002497 return false; // Don't support all element types yet.
2498 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002499 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00002500
2501 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002502 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00002503 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
2504 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002505 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00002506 return false;
2507
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002508 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002509 unsigned DstSize = DstTy.getSizeInBits();
2510
2511 // Keep track of the last MI we inserted. Later on, we might be able to save
2512 // a copy using it.
2513 MachineInstr *PrevMI = nullptr;
2514 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00002515 // Note that if we don't do a subregister copy, we can end up making an
2516 // extra register.
2517 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
2518 MIRBuilder);
2519 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00002520 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002521
2522 // If DstTy's size in bits is less than 128, then emit a subregister copy
2523 // from DstVec to the last register we've defined.
2524 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00002525 // Force this to be FPR using the destination vector.
2526 const TargetRegisterClass *RC =
2527 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00002528 if (!RC)
2529 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00002530 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2531 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2532 return false;
2533 }
2534
2535 unsigned SubReg = 0;
2536 if (!getSubRegForClass(RC, TRI, SubReg))
2537 return false;
2538 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2539 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
2540 << "\n");
2541 return false;
2542 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002543
2544 unsigned Reg = MRI.createVirtualRegister(RC);
2545 unsigned DstReg = I.getOperand(0).getReg();
2546
Amara Emerson86271782019-03-18 19:20:10 +00002547 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2548 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00002549 MachineOperand &RegOp = I.getOperand(1);
2550 RegOp.setReg(Reg);
2551 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2552 } else {
2553 // We don't need a subregister copy. Save a copy by re-using the
2554 // destination register on the final insert.
2555 assert(PrevMI && "PrevMI was null?");
2556 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
2557 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
2558 }
2559
Amara Emerson5ec14602018-12-10 18:44:58 +00002560 I.eraseFromParent();
2561 return true;
2562}
2563
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002564/// SelectArithImmed - Select an immediate value that can be represented as
2565/// a 12-bit value shifted left by either 0 or 12. If so, return true with
2566/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002567InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00002568AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002569 MachineInstr &MI = *Root.getParent();
2570 MachineBasicBlock &MBB = *MI.getParent();
2571 MachineFunction &MF = *MBB.getParent();
2572 MachineRegisterInfo &MRI = MF.getRegInfo();
2573
2574 // This function is called from the addsub_shifted_imm ComplexPattern,
2575 // which lists [imm] as the list of opcode it's interested in, however
2576 // we still need to check whether the operand is actually an immediate
2577 // here because the ComplexPattern opcode list is only used in
2578 // root-level opcode matching.
2579 uint64_t Immed;
2580 if (Root.isImm())
2581 Immed = Root.getImm();
2582 else if (Root.isCImm())
2583 Immed = Root.getCImm()->getZExtValue();
2584 else if (Root.isReg()) {
2585 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
2586 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002587 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002588 MachineOperand &Op1 = Def->getOperand(1);
2589 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002590 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002591 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002592 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002593 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002594
2595 unsigned ShiftAmt;
2596
2597 if (Immed >> 12 == 0) {
2598 ShiftAmt = 0;
2599 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
2600 ShiftAmt = 12;
2601 Immed = Immed >> 12;
2602 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002603 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002604
2605 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002606 return {{
2607 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
2608 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
2609 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002610}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002611
Daniel Sandersea8711b2017-10-16 03:36:29 +00002612/// Select a "register plus unscaled signed 9-bit immediate" address. This
2613/// should only match when there is an offset that is not valid for a scaled
2614/// immediate addressing mode. The "Size" argument is the size in bytes of the
2615/// memory reference, which is needed here to know what is valid for a scaled
2616/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002617InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002618AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
2619 unsigned Size) const {
2620 MachineRegisterInfo &MRI =
2621 Root.getParent()->getParent()->getParent()->getRegInfo();
2622
2623 if (!Root.isReg())
2624 return None;
2625
2626 if (!isBaseWithConstantOffset(Root, MRI))
2627 return None;
2628
2629 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2630 if (!RootDef)
2631 return None;
2632
2633 MachineOperand &OffImm = RootDef->getOperand(2);
2634 if (!OffImm.isReg())
2635 return None;
2636 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
2637 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
2638 return None;
2639 int64_t RHSC;
2640 MachineOperand &RHSOp1 = RHS->getOperand(1);
2641 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
2642 return None;
2643 RHSC = RHSOp1.getCImm()->getSExtValue();
2644
2645 // If the offset is valid as a scaled immediate, don't match here.
2646 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
2647 return None;
2648 if (RHSC >= -256 && RHSC < 256) {
2649 MachineOperand &Base = RootDef->getOperand(1);
2650 return {{
2651 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
2652 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
2653 }};
2654 }
2655 return None;
2656}
2657
2658/// Select a "register plus scaled unsigned 12-bit immediate" address. The
2659/// "Size" argument is the size in bytes of the memory reference, which
2660/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002661InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002662AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
2663 unsigned Size) const {
2664 MachineRegisterInfo &MRI =
2665 Root.getParent()->getParent()->getParent()->getRegInfo();
2666
2667 if (!Root.isReg())
2668 return None;
2669
2670 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2671 if (!RootDef)
2672 return None;
2673
2674 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
2675 return {{
2676 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
2677 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2678 }};
2679 }
2680
2681 if (isBaseWithConstantOffset(Root, MRI)) {
2682 MachineOperand &LHS = RootDef->getOperand(1);
2683 MachineOperand &RHS = RootDef->getOperand(2);
2684 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
2685 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
2686 if (LHSDef && RHSDef) {
2687 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
2688 unsigned Scale = Log2_32(Size);
2689 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
2690 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00002691 return {{
2692 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
2693 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2694 }};
2695
Daniel Sandersea8711b2017-10-16 03:36:29 +00002696 return {{
2697 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
2698 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2699 }};
2700 }
2701 }
2702 }
2703
2704 // Before falling back to our general case, check if the unscaled
2705 // instructions can handle this. If so, that's preferable.
2706 if (selectAddrModeUnscaled(Root, Size).hasValue())
2707 return None;
2708
2709 return {{
2710 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2711 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2712 }};
2713}
2714
Volkan Kelesf7f25682018-01-16 18:44:05 +00002715void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
2716 const MachineInstr &MI) const {
2717 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2718 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
2719 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
2720 assert(CstVal && "Expected constant value");
2721 MIB.addImm(CstVal.getValue());
2722}
2723
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002724namespace llvm {
2725InstructionSelector *
2726createAArch64InstructionSelector(const AArch64TargetMachine &TM,
2727 AArch64Subtarget &Subtarget,
2728 AArch64RegisterBankInfo &RBI) {
2729 return new AArch64InstructionSelector(TM, Subtarget, RBI);
2730}
2731}