blob: f1274d7fa22acde2a7edf89e591637402b23db62 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson5ec14602018-12-10 18:44:58 +000070 // Helper to generate an equivalent of scalar_to_vector into a new register,
71 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000072 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000073 const TargetRegisterClass *DstRC,
74 unsigned Scalar,
75 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000076
77 /// Emit a lane insert into \p DstReg, or a new vector register if None is
78 /// provided.
79 ///
80 /// The lane inserted into is defined by \p LaneIdx. The vector source
81 /// register is given by \p SrcReg. The register containing the element is
82 /// given by \p EltReg.
83 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
84 unsigned EltReg, unsigned LaneIdx,
85 const RegisterBank &RB,
86 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000087 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000088 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000089 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000090 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091
Amara Emerson1abe05c2019-02-21 20:20:16 +000092 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
93 SmallVectorImpl<int> &Idxs) const;
94 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000095 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000096 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +000097 bool selectSplitVectorUnmerge(MachineInstr &I,
98 MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000099
100 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
101 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
102 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000103
104 // Emit a vector concat operation.
105 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
106 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000107 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000108 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
109 const RegisterBank &DstRB, LLT ScalarTy,
110 unsigned VecReg, unsigned LaneIdx,
111 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000112
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000113 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000114
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000115 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
116 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000117
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000118 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000119 return selectAddrModeUnscaled(Root, 1);
120 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000121 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000122 return selectAddrModeUnscaled(Root, 2);
123 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000124 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000125 return selectAddrModeUnscaled(Root, 4);
126 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000127 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000128 return selectAddrModeUnscaled(Root, 8);
129 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000130 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000131 return selectAddrModeUnscaled(Root, 16);
132 }
133
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000134 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
135 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000136 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000137 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000138 return selectAddrModeIndexed(Root, Width / 8);
139 }
140
Volkan Kelesf7f25682018-01-16 18:44:05 +0000141 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
142
Amara Emerson1e8c1642018-07-31 00:09:02 +0000143 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
144 void materializeLargeCMVal(MachineInstr &I, const Value *V,
145 unsigned char OpFlags) const;
146
Amara Emerson761ca2e2019-03-19 21:43:05 +0000147 // Optimization methods.
148
149 // Helper function to check if a reg def is an MI with a given opcode and
150 // returns it if so.
151 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
152 MachineIRBuilder &MIB) const {
153 auto *Def = MIB.getMRI()->getVRegDef(Reg);
154 if (!Def || Def->getOpcode() != Opc)
155 return nullptr;
156 return Def;
157 }
158
159 bool tryOptVectorShuffle(MachineInstr &I) const;
160 bool tryOptVectorDup(MachineInstr &MI) const;
161
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000162 const AArch64TargetMachine &TM;
163 const AArch64Subtarget &STI;
164 const AArch64InstrInfo &TII;
165 const AArch64RegisterInfo &TRI;
166 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000167
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000168#define GET_GLOBALISEL_PREDICATES_DECL
169#include "AArch64GenGlobalISel.inc"
170#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000171
172// We declare the temporaries used by selectImpl() in the class to minimize the
173// cost of constructing placeholder values.
174#define GET_GLOBALISEL_TEMPORARIES_DECL
175#include "AArch64GenGlobalISel.inc"
176#undef GET_GLOBALISEL_TEMPORARIES_DECL
177};
178
179} // end anonymous namespace
180
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000181#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000182#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000183#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000184
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000185AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000186 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
187 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000188 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000189 TRI(*STI.getRegisterInfo()), RBI(RBI),
190#define GET_GLOBALISEL_PREDICATES_INIT
191#include "AArch64GenGlobalISel.inc"
192#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000193#define GET_GLOBALISEL_TEMPORARIES_INIT
194#include "AArch64GenGlobalISel.inc"
195#undef GET_GLOBALISEL_TEMPORARIES_INIT
196{
197}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000198
Tim Northoverfb8d9892016-10-12 22:49:15 +0000199// FIXME: This should be target-independent, inferred from the types declared
200// for each class in the bank.
201static const TargetRegisterClass *
202getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000203 const RegisterBankInfo &RBI,
204 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000205 if (RB.getID() == AArch64::GPRRegBankID) {
206 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000207 return GetAllRegSet ? &AArch64::GPR32allRegClass
208 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000209 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000210 return GetAllRegSet ? &AArch64::GPR64allRegClass
211 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000212 return nullptr;
213 }
214
215 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000216 if (Ty.getSizeInBits() <= 16)
217 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000218 if (Ty.getSizeInBits() == 32)
219 return &AArch64::FPR32RegClass;
220 if (Ty.getSizeInBits() == 64)
221 return &AArch64::FPR64RegClass;
222 if (Ty.getSizeInBits() == 128)
223 return &AArch64::FPR128RegClass;
224 return nullptr;
225 }
226
227 return nullptr;
228}
229
Jessica Paquette245047d2019-01-24 22:00:41 +0000230/// Given a register bank, and size in bits, return the smallest register class
231/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000232static const TargetRegisterClass *
233getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
234 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000235 unsigned RegBankID = RB.getID();
236
237 if (RegBankID == AArch64::GPRRegBankID) {
238 if (SizeInBits <= 32)
239 return GetAllRegSet ? &AArch64::GPR32allRegClass
240 : &AArch64::GPR32RegClass;
241 if (SizeInBits == 64)
242 return GetAllRegSet ? &AArch64::GPR64allRegClass
243 : &AArch64::GPR64RegClass;
244 }
245
246 if (RegBankID == AArch64::FPRRegBankID) {
247 switch (SizeInBits) {
248 default:
249 return nullptr;
250 case 8:
251 return &AArch64::FPR8RegClass;
252 case 16:
253 return &AArch64::FPR16RegClass;
254 case 32:
255 return &AArch64::FPR32RegClass;
256 case 64:
257 return &AArch64::FPR64RegClass;
258 case 128:
259 return &AArch64::FPR128RegClass;
260 }
261 }
262
263 return nullptr;
264}
265
266/// Returns the correct subregister to use for a given register class.
267static bool getSubRegForClass(const TargetRegisterClass *RC,
268 const TargetRegisterInfo &TRI, unsigned &SubReg) {
269 switch (TRI.getRegSizeInBits(*RC)) {
270 case 8:
271 SubReg = AArch64::bsub;
272 break;
273 case 16:
274 SubReg = AArch64::hsub;
275 break;
276 case 32:
277 if (RC == &AArch64::GPR32RegClass)
278 SubReg = AArch64::sub_32;
279 else
280 SubReg = AArch64::ssub;
281 break;
282 case 64:
283 SubReg = AArch64::dsub;
284 break;
285 default:
286 LLVM_DEBUG(
287 dbgs() << "Couldn't find appropriate subregister for register class.");
288 return false;
289 }
290
291 return true;
292}
293
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000294/// Check whether \p I is a currently unsupported binary operation:
295/// - it has an unsized type
296/// - an operand is not a vreg
297/// - all operands are not in the same bank
298/// These are checks that should someday live in the verifier, but right now,
299/// these are mostly limitations of the aarch64 selector.
300static bool unsupportedBinOp(const MachineInstr &I,
301 const AArch64RegisterBankInfo &RBI,
302 const MachineRegisterInfo &MRI,
303 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000304 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000305 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000306 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000307 return true;
308 }
309
310 const RegisterBank *PrevOpBank = nullptr;
311 for (auto &MO : I.operands()) {
312 // FIXME: Support non-register operands.
313 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000314 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000315 return true;
316 }
317
318 // FIXME: Can generic operations have physical registers operands? If
319 // so, this will need to be taught about that, and we'll need to get the
320 // bank out of the minimal class for the register.
321 // Either way, this needs to be documented (and possibly verified).
322 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000323 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000324 return true;
325 }
326
327 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
328 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000329 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000330 return true;
331 }
332
333 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000334 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000335 return true;
336 }
337 PrevOpBank = OpBank;
338 }
339 return false;
340}
341
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000342/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000343/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000344/// and of size \p OpSize.
345/// \returns \p GenericOpc if the combination is unsupported.
346static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
347 unsigned OpSize) {
348 switch (RegBankID) {
349 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000350 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000351 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000352 case TargetOpcode::G_SHL:
353 return AArch64::LSLVWr;
354 case TargetOpcode::G_LSHR:
355 return AArch64::LSRVWr;
356 case TargetOpcode::G_ASHR:
357 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000358 default:
359 return GenericOpc;
360 }
Tim Northover55782222016-10-18 20:03:48 +0000361 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000362 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000363 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000364 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000365 case TargetOpcode::G_SHL:
366 return AArch64::LSLVXr;
367 case TargetOpcode::G_LSHR:
368 return AArch64::LSRVXr;
369 case TargetOpcode::G_ASHR:
370 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000371 default:
372 return GenericOpc;
373 }
374 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000375 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000376 case AArch64::FPRRegBankID:
377 switch (OpSize) {
378 case 32:
379 switch (GenericOpc) {
380 case TargetOpcode::G_FADD:
381 return AArch64::FADDSrr;
382 case TargetOpcode::G_FSUB:
383 return AArch64::FSUBSrr;
384 case TargetOpcode::G_FMUL:
385 return AArch64::FMULSrr;
386 case TargetOpcode::G_FDIV:
387 return AArch64::FDIVSrr;
388 default:
389 return GenericOpc;
390 }
391 case 64:
392 switch (GenericOpc) {
393 case TargetOpcode::G_FADD:
394 return AArch64::FADDDrr;
395 case TargetOpcode::G_FSUB:
396 return AArch64::FSUBDrr;
397 case TargetOpcode::G_FMUL:
398 return AArch64::FMULDrr;
399 case TargetOpcode::G_FDIV:
400 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000401 case TargetOpcode::G_OR:
402 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000403 default:
404 return GenericOpc;
405 }
406 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000407 break;
408 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000409 return GenericOpc;
410}
411
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000412/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
413/// appropriate for the (value) register bank \p RegBankID and of memory access
414/// size \p OpSize. This returns the variant with the base+unsigned-immediate
415/// addressing mode (e.g., LDRXui).
416/// \returns \p GenericOpc if the combination is unsupported.
417static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
418 unsigned OpSize) {
419 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
420 switch (RegBankID) {
421 case AArch64::GPRRegBankID:
422 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000423 case 8:
424 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
425 case 16:
426 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000427 case 32:
428 return isStore ? AArch64::STRWui : AArch64::LDRWui;
429 case 64:
430 return isStore ? AArch64::STRXui : AArch64::LDRXui;
431 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000432 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000433 case AArch64::FPRRegBankID:
434 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000435 case 8:
436 return isStore ? AArch64::STRBui : AArch64::LDRBui;
437 case 16:
438 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000439 case 32:
440 return isStore ? AArch64::STRSui : AArch64::LDRSui;
441 case 64:
442 return isStore ? AArch64::STRDui : AArch64::LDRDui;
443 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000444 break;
445 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000446 return GenericOpc;
447}
448
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000449#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000450/// Helper function that verifies that we have a valid copy at the end of
451/// selectCopy. Verifies that the source and dest have the expected sizes and
452/// then returns true.
453static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
454 const MachineRegisterInfo &MRI,
455 const TargetRegisterInfo &TRI,
456 const RegisterBankInfo &RBI) {
457 const unsigned DstReg = I.getOperand(0).getReg();
458 const unsigned SrcReg = I.getOperand(1).getReg();
459 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
460 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000461
Jessica Paquette245047d2019-01-24 22:00:41 +0000462 // Make sure the size of the source and dest line up.
463 assert(
464 (DstSize == SrcSize ||
465 // Copies are a mean to setup initial types, the number of
466 // bits may not exactly match.
467 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
468 // Copies are a mean to copy bits around, as long as we are
469 // on the same register class, that's fine. Otherwise, that
470 // means we need some SUBREG_TO_REG or AND & co.
471 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
472 "Copy with different width?!");
473
474 // Check the size of the destination.
475 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
476 "GPRs cannot get more than 64-bit width values");
477
478 return true;
479}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000480#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000481
482/// Helper function for selectCopy. Inserts a subregister copy from
483/// \p *From to \p *To, linking it up to \p I.
484///
485/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
486///
487/// CopyReg (From class) = COPY SrcReg
488/// SubRegCopy (To class) = COPY CopyReg:SubReg
489/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000490static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000491 const RegisterBankInfo &RBI, unsigned SrcReg,
492 const TargetRegisterClass *From,
493 const TargetRegisterClass *To,
494 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000495 MachineIRBuilder MIB(I);
496 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000497 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
498 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000499 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000500 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000501
502 // It's possible that the destination register won't be constrained. Make
503 // sure that happens.
504 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
505 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
506
Amara Emersondb211892018-02-20 05:11:57 +0000507 return true;
508}
509
Quentin Colombetcb629a82016-10-12 03:57:49 +0000510static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
511 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
512 const RegisterBankInfo &RBI) {
513
514 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000515 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000516 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
517 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
518 const TargetRegisterClass *DstRC = getMinClassForRegBank(
519 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
520 if (!DstRC) {
521 LLVM_DEBUG(dbgs() << "Unexpected dest size "
522 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000523 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000524 }
525
Jessica Paquette245047d2019-01-24 22:00:41 +0000526 // A couple helpers below, for making sure that the copy we produce is valid.
527
528 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
529 // to verify that the src and dst are the same size, since that's handled by
530 // the SUBREG_TO_REG.
531 bool KnownValid = false;
532
533 // Returns true, or asserts if something we don't expect happens. Instead of
534 // returning true, we return isValidCopy() to ensure that we verify the
535 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000536 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000537 // If we have a bitcast or something, we can't have physical registers.
538 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000539 (I.isCopy() ||
540 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
541 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
542 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000543 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000544 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000545 return true;
546 };
547
548 // Is this a copy? If so, then we may need to insert a subregister copy, or
549 // a SUBREG_TO_REG.
550 if (I.isCopy()) {
551 // Yes. Check if there's anything to fix up.
552 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
553 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
Amara Emerson7e9f3482018-02-18 17:10:49 +0000554 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000555 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
556 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000557 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000558
559 // Is this a cross-bank copy?
560 if (DstRegBank.getID() != SrcRegBank.getID()) {
561 // If we're doing a cross-bank copy on different-sized registers, we need
562 // to do a bit more work.
563 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
564 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
565
566 if (SrcSize > DstSize) {
567 // We're doing a cross-bank copy into a smaller register. We need a
568 // subregister copy. First, get a register class that's on the same bank
569 // as the destination, but the same size as the source.
570 const TargetRegisterClass *SubregRC =
571 getMinClassForRegBank(DstRegBank, SrcSize, true);
572 assert(SubregRC && "Didn't get a register class for subreg?");
573
574 // Get the appropriate subregister for the destination.
575 unsigned SubReg = 0;
576 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
577 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
578 return false;
579 }
580
581 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000582 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000583 return CheckCopy();
584 }
585
586 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
587 SrcSize == 16) {
588 // Special case for FPR16 to GPR32.
589 // FIXME: This can probably be generalized like the above case.
590 unsigned PromoteReg =
591 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
592 BuildMI(*I.getParent(), I, I.getDebugLoc(),
593 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
594 .addImm(0)
595 .addUse(SrcReg)
596 .addImm(AArch64::hsub);
597 MachineOperand &RegOp = I.getOperand(1);
598 RegOp.setReg(PromoteReg);
599
600 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
601 KnownValid = true;
602 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000603 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000604
605 // If the destination is a physical register, then there's nothing to
606 // change, so we're done.
607 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
608 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000609 }
610
Jessica Paquette245047d2019-01-24 22:00:41 +0000611 // No need to constrain SrcReg. It will get constrained when we hit another
612 // of its use or its defs. Copies do not have constraints.
613 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000614 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
615 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000616 return false;
617 }
618 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000619 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000620}
621
Tim Northover69271c62016-10-12 22:49:11 +0000622static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
623 if (!DstTy.isScalar() || !SrcTy.isScalar())
624 return GenericOpc;
625
626 const unsigned DstSize = DstTy.getSizeInBits();
627 const unsigned SrcSize = SrcTy.getSizeInBits();
628
629 switch (DstSize) {
630 case 32:
631 switch (SrcSize) {
632 case 32:
633 switch (GenericOpc) {
634 case TargetOpcode::G_SITOFP:
635 return AArch64::SCVTFUWSri;
636 case TargetOpcode::G_UITOFP:
637 return AArch64::UCVTFUWSri;
638 case TargetOpcode::G_FPTOSI:
639 return AArch64::FCVTZSUWSr;
640 case TargetOpcode::G_FPTOUI:
641 return AArch64::FCVTZUUWSr;
642 default:
643 return GenericOpc;
644 }
645 case 64:
646 switch (GenericOpc) {
647 case TargetOpcode::G_SITOFP:
648 return AArch64::SCVTFUXSri;
649 case TargetOpcode::G_UITOFP:
650 return AArch64::UCVTFUXSri;
651 case TargetOpcode::G_FPTOSI:
652 return AArch64::FCVTZSUWDr;
653 case TargetOpcode::G_FPTOUI:
654 return AArch64::FCVTZUUWDr;
655 default:
656 return GenericOpc;
657 }
658 default:
659 return GenericOpc;
660 }
661 case 64:
662 switch (SrcSize) {
663 case 32:
664 switch (GenericOpc) {
665 case TargetOpcode::G_SITOFP:
666 return AArch64::SCVTFUWDri;
667 case TargetOpcode::G_UITOFP:
668 return AArch64::UCVTFUWDri;
669 case TargetOpcode::G_FPTOSI:
670 return AArch64::FCVTZSUXSr;
671 case TargetOpcode::G_FPTOUI:
672 return AArch64::FCVTZUUXSr;
673 default:
674 return GenericOpc;
675 }
676 case 64:
677 switch (GenericOpc) {
678 case TargetOpcode::G_SITOFP:
679 return AArch64::SCVTFUXDri;
680 case TargetOpcode::G_UITOFP:
681 return AArch64::UCVTFUXDri;
682 case TargetOpcode::G_FPTOSI:
683 return AArch64::FCVTZSUXDr;
684 case TargetOpcode::G_FPTOUI:
685 return AArch64::FCVTZUUXDr;
686 default:
687 return GenericOpc;
688 }
689 default:
690 return GenericOpc;
691 }
692 default:
693 return GenericOpc;
694 };
695 return GenericOpc;
696}
697
Tim Northover6c02ad52016-10-12 22:49:04 +0000698static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
699 switch (P) {
700 default:
701 llvm_unreachable("Unknown condition code!");
702 case CmpInst::ICMP_NE:
703 return AArch64CC::NE;
704 case CmpInst::ICMP_EQ:
705 return AArch64CC::EQ;
706 case CmpInst::ICMP_SGT:
707 return AArch64CC::GT;
708 case CmpInst::ICMP_SGE:
709 return AArch64CC::GE;
710 case CmpInst::ICMP_SLT:
711 return AArch64CC::LT;
712 case CmpInst::ICMP_SLE:
713 return AArch64CC::LE;
714 case CmpInst::ICMP_UGT:
715 return AArch64CC::HI;
716 case CmpInst::ICMP_UGE:
717 return AArch64CC::HS;
718 case CmpInst::ICMP_ULT:
719 return AArch64CC::LO;
720 case CmpInst::ICMP_ULE:
721 return AArch64CC::LS;
722 }
723}
724
Tim Northover7dd378d2016-10-12 22:49:07 +0000725static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
726 AArch64CC::CondCode &CondCode,
727 AArch64CC::CondCode &CondCode2) {
728 CondCode2 = AArch64CC::AL;
729 switch (P) {
730 default:
731 llvm_unreachable("Unknown FP condition!");
732 case CmpInst::FCMP_OEQ:
733 CondCode = AArch64CC::EQ;
734 break;
735 case CmpInst::FCMP_OGT:
736 CondCode = AArch64CC::GT;
737 break;
738 case CmpInst::FCMP_OGE:
739 CondCode = AArch64CC::GE;
740 break;
741 case CmpInst::FCMP_OLT:
742 CondCode = AArch64CC::MI;
743 break;
744 case CmpInst::FCMP_OLE:
745 CondCode = AArch64CC::LS;
746 break;
747 case CmpInst::FCMP_ONE:
748 CondCode = AArch64CC::MI;
749 CondCode2 = AArch64CC::GT;
750 break;
751 case CmpInst::FCMP_ORD:
752 CondCode = AArch64CC::VC;
753 break;
754 case CmpInst::FCMP_UNO:
755 CondCode = AArch64CC::VS;
756 break;
757 case CmpInst::FCMP_UEQ:
758 CondCode = AArch64CC::EQ;
759 CondCode2 = AArch64CC::VS;
760 break;
761 case CmpInst::FCMP_UGT:
762 CondCode = AArch64CC::HI;
763 break;
764 case CmpInst::FCMP_UGE:
765 CondCode = AArch64CC::PL;
766 break;
767 case CmpInst::FCMP_ULT:
768 CondCode = AArch64CC::LT;
769 break;
770 case CmpInst::FCMP_ULE:
771 CondCode = AArch64CC::LE;
772 break;
773 case CmpInst::FCMP_UNE:
774 CondCode = AArch64CC::NE;
775 break;
776 }
777}
778
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000779bool AArch64InstructionSelector::selectCompareBranch(
780 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
781
782 const unsigned CondReg = I.getOperand(0).getReg();
783 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
784 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000785 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
786 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000787 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
788 return false;
789
790 unsigned LHS = CCMI->getOperand(2).getReg();
791 unsigned RHS = CCMI->getOperand(3).getReg();
792 if (!getConstantVRegVal(RHS, MRI))
793 std::swap(RHS, LHS);
794
795 const auto RHSImm = getConstantVRegVal(RHS, MRI);
796 if (!RHSImm || *RHSImm != 0)
797 return false;
798
799 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
800 if (RB.getID() != AArch64::GPRRegBankID)
801 return false;
802
803 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
804 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
805 return false;
806
807 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
808 unsigned CBOpc = 0;
809 if (CmpWidth <= 32)
810 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
811 else if (CmpWidth == 64)
812 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
813 else
814 return false;
815
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000816 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
817 .addUse(LHS)
818 .addMBB(DestMBB)
819 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000820
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000821 I.eraseFromParent();
822 return true;
823}
824
Tim Northovere9600d82017-02-08 17:57:27 +0000825bool AArch64InstructionSelector::selectVaStartAAPCS(
826 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
827 return false;
828}
829
830bool AArch64InstructionSelector::selectVaStartDarwin(
831 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
832 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
833 unsigned ListReg = I.getOperand(0).getReg();
834
835 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
836
837 auto MIB =
838 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
839 .addDef(ArgsAddrReg)
840 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
841 .addImm(0)
842 .addImm(0);
843
844 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
845
846 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
847 .addUse(ArgsAddrReg)
848 .addUse(ListReg)
849 .addImm(0)
850 .addMemOperand(*I.memoperands_begin());
851
852 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
853 I.eraseFromParent();
854 return true;
855}
856
Amara Emerson1e8c1642018-07-31 00:09:02 +0000857void AArch64InstructionSelector::materializeLargeCMVal(
858 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
859 MachineBasicBlock &MBB = *I.getParent();
860 MachineFunction &MF = *MBB.getParent();
861 MachineRegisterInfo &MRI = MF.getRegInfo();
862 MachineIRBuilder MIB(I);
863
Aditya Nandakumarcef44a22018-12-11 00:48:50 +0000864 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +0000865 MovZ->addOperand(MF, I.getOperand(1));
866 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
867 AArch64II::MO_NC);
868 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
869 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
870
871 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
872 unsigned ForceDstReg) {
873 unsigned DstReg = ForceDstReg
874 ? ForceDstReg
875 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
876 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
877 if (auto *GV = dyn_cast<GlobalValue>(V)) {
878 MovI->addOperand(MF, MachineOperand::CreateGA(
879 GV, MovZ->getOperand(1).getOffset(), Flags));
880 } else {
881 MovI->addOperand(
882 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
883 MovZ->getOperand(1).getOffset(), Flags));
884 }
885 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
886 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
887 return DstReg;
888 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +0000889 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +0000890 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
891 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
892 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
893 return;
894}
895
Daniel Sandersf76f3152017-11-16 00:46:35 +0000896bool AArch64InstructionSelector::select(MachineInstr &I,
897 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000898 assert(I.getParent() && "Instruction should be in a basic block!");
899 assert(I.getParent()->getParent() && "Instruction should be in a function!");
900
901 MachineBasicBlock &MBB = *I.getParent();
902 MachineFunction &MF = *MBB.getParent();
903 MachineRegisterInfo &MRI = MF.getRegInfo();
904
Tim Northovercdf23f12016-10-31 18:30:59 +0000905 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000906 // G_PHI requires same handling as PHI
907 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +0000908 // Certain non-generic instructions also need some special handling.
909
910 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
911 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000912
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000913 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +0000914 const unsigned DefReg = I.getOperand(0).getReg();
915 const LLT DefTy = MRI.getType(DefReg);
916
917 const TargetRegisterClass *DefRC = nullptr;
918 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
919 DefRC = TRI.getRegClass(DefReg);
920 } else {
921 const RegClassOrRegBank &RegClassOrBank =
922 MRI.getRegClassOrRegBank(DefReg);
923
924 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
925 if (!DefRC) {
926 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000927 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000928 return false;
929 }
930 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
931 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
932 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000933 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000934 return false;
935 }
936 }
937 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000938 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +0000939
940 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
941 }
942
943 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +0000944 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000945
946 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +0000947 }
948
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000949
950 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000951 LLVM_DEBUG(
952 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000953 return false;
954 }
955
Daniel Sandersf76f3152017-11-16 00:46:35 +0000956 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000957 return true;
958
Tim Northover32a078a2016-09-15 10:09:59 +0000959 LLT Ty =
960 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000961
Amara Emerson3739a202019-03-15 21:59:50 +0000962 MachineIRBuilder MIB(I);
963
Tim Northover69271c62016-10-12 22:49:11 +0000964 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +0000965 case TargetOpcode::G_BRCOND: {
966 if (Ty.getSizeInBits() > 32) {
967 // We shouldn't need this on AArch64, but it would be implemented as an
968 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
969 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000970 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
971 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +0000972 return false;
973 }
974
975 const unsigned CondReg = I.getOperand(0).getReg();
976 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
977
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000978 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
979 // instructions will not be produced, as they are conditional branch
980 // instructions that do not set flags.
981 bool ProduceNonFlagSettingCondBr =
982 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
983 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000984 return true;
985
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000986 if (ProduceNonFlagSettingCondBr) {
987 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
988 .addUse(CondReg)
989 .addImm(/*bit offset=*/0)
990 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +0000991
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000992 I.eraseFromParent();
993 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
994 } else {
995 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
996 .addDef(AArch64::WZR)
997 .addUse(CondReg)
998 .addImm(1);
999 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1000 auto Bcc =
1001 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1002 .addImm(AArch64CC::EQ)
1003 .addMBB(DestMBB);
1004
1005 I.eraseFromParent();
1006 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1007 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001008 }
1009
Kristof Beyls65a12c02017-01-30 09:13:18 +00001010 case TargetOpcode::G_BRINDIRECT: {
1011 I.setDesc(TII.get(AArch64::BR));
1012 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1013 }
1014
Tim Northover4494d692016-10-18 19:47:57 +00001015 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001016 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001017 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1018
1019 const LLT s32 = LLT::scalar(32);
1020 const LLT s64 = LLT::scalar(64);
1021 const LLT p0 = LLT::pointer(0, 64);
1022
1023 const unsigned DefReg = I.getOperand(0).getReg();
1024 const LLT DefTy = MRI.getType(DefReg);
1025 const unsigned DefSize = DefTy.getSizeInBits();
1026 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1027
1028 // FIXME: Redundant check, but even less readable when factored out.
1029 if (isFP) {
1030 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001031 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1032 << " constant, expected: " << s32 << " or " << s64
1033 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001034 return false;
1035 }
1036
1037 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001038 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1039 << " constant on bank: " << RB
1040 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001041 return false;
1042 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001043
1044 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1045 // can be sure tablegen works correctly and isn't rescued by this code.
1046 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1047 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001048 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001049 // s32 and s64 are covered by tablegen.
1050 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001051 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1052 << " constant, expected: " << s32 << ", " << s64
1053 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001054 return false;
1055 }
1056
1057 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001058 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1059 << " constant on bank: " << RB
1060 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001061 return false;
1062 }
1063 }
1064
1065 const unsigned MovOpc =
1066 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1067
1068 I.setDesc(TII.get(MovOpc));
1069
1070 if (isFP) {
1071 const TargetRegisterClass &GPRRC =
1072 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1073 const TargetRegisterClass &FPRRC =
1074 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1075
1076 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1077 MachineOperand &RegOp = I.getOperand(0);
1078 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001079 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1080 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001081
1082 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001083 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001084 return false;
1085 }
1086
1087 MachineOperand &ImmOp = I.getOperand(1);
1088 // FIXME: Is going through int64_t always correct?
1089 ImmOp.ChangeToImmediate(
1090 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001091 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001092 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1093 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001094 } else if (I.getOperand(1).isImm()) {
1095 uint64_t Val = I.getOperand(1).getImm();
1096 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001097 }
1098
1099 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1100 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001101 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001102 case TargetOpcode::G_EXTRACT: {
1103 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001104 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001105 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001106 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001107 // Larger extracts are vectors, same-size extracts should be something else
1108 // by now (either split up or simplified to a COPY).
1109 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1110 return false;
1111
Amara Emersonbc03bae2018-02-18 17:03:02 +00001112 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001113 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1114 Ty.getSizeInBits() - 1);
1115
Amara Emersonbc03bae2018-02-18 17:03:02 +00001116 if (SrcSize < 64) {
1117 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1118 "unexpected G_EXTRACT types");
1119 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1120 }
1121
Tim Northover7b6d66c2017-07-20 22:58:38 +00001122 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001123 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001124 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1125 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001126 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1127 AArch64::GPR32RegClass, MRI);
1128 I.getOperand(0).setReg(DstReg);
1129
1130 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1131 }
1132
1133 case TargetOpcode::G_INSERT: {
1134 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001135 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1136 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001137 // Larger inserts are vectors, same-size ones should be something else by
1138 // now (split up or turned into COPYs).
1139 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1140 return false;
1141
Amara Emersonbc03bae2018-02-18 17:03:02 +00001142 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001143 unsigned LSB = I.getOperand(3).getImm();
1144 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001145 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001146 MachineInstrBuilder(MF, I).addImm(Width - 1);
1147
Amara Emersonbc03bae2018-02-18 17:03:02 +00001148 if (DstSize < 64) {
1149 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1150 "unexpected G_INSERT types");
1151 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1152 }
1153
Tim Northover7b6d66c2017-07-20 22:58:38 +00001154 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1155 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1156 TII.get(AArch64::SUBREG_TO_REG))
1157 .addDef(SrcReg)
1158 .addImm(0)
1159 .addUse(I.getOperand(2).getReg())
1160 .addImm(AArch64::sub_32);
1161 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1162 AArch64::GPR32RegClass, MRI);
1163 I.getOperand(2).setReg(SrcReg);
1164
1165 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1166 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001167 case TargetOpcode::G_FRAME_INDEX: {
1168 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001169 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001170 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1171 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001172 return false;
1173 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001174 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001175
1176 // MOs for a #0 shifted immediate.
1177 I.addOperand(MachineOperand::CreateImm(0));
1178 I.addOperand(MachineOperand::CreateImm(0));
1179
1180 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1181 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001182
1183 case TargetOpcode::G_GLOBAL_VALUE: {
1184 auto GV = I.getOperand(1).getGlobal();
1185 if (GV->isThreadLocal()) {
1186 // FIXME: we don't support TLS yet.
1187 return false;
1188 }
1189 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001190 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001191 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001192 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001193 } else if (TM.getCodeModel() == CodeModel::Large) {
1194 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001195 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001196 I.eraseFromParent();
1197 return true;
David Green9dd1d452018-08-22 11:31:39 +00001198 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1199 I.setDesc(TII.get(AArch64::ADR));
1200 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001201 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001202 I.setDesc(TII.get(AArch64::MOVaddr));
1203 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1204 MachineInstrBuilder MIB(MF, I);
1205 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1206 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1207 }
1208 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1209 }
1210
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001211 case TargetOpcode::G_LOAD:
1212 case TargetOpcode::G_STORE: {
Tim Northover0f140c72016-09-09 11:46:34 +00001213 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001214
Tim Northover5ae83502016-09-15 09:20:34 +00001215 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001216 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1217 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001218 return false;
1219 }
1220
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001221 auto &MemOp = **I.memoperands_begin();
1222 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001223 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001224 return false;
1225 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001226 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001227
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001228 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001229#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001230 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001231 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001232 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1233 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001234 assert(MRI.getType(PtrReg).isPointer() &&
1235 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001236#endif
1237
1238 const unsigned ValReg = I.getOperand(0).getReg();
1239 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1240
1241 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001242 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001243 if (NewOpc == I.getOpcode())
1244 return false;
1245
1246 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001247
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001248 uint64_t Offset = 0;
1249 auto *PtrMI = MRI.getVRegDef(PtrReg);
1250
1251 // Try to fold a GEP into our unsigned immediate addressing mode.
1252 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1253 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1254 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001255 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001256 const unsigned Scale = Log2_32(Size);
1257 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1258 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1259 I.getOperand(1).setReg(Ptr2Reg);
1260 PtrMI = MRI.getVRegDef(Ptr2Reg);
1261 Offset = Imm / Size;
1262 }
1263 }
1264 }
1265
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001266 // If we haven't folded anything into our addressing mode yet, try to fold
1267 // a frame index into the base+offset.
1268 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1269 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1270
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001271 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001272
1273 // If we're storing a 0, use WZR/XZR.
1274 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1275 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1276 if (I.getOpcode() == AArch64::STRWui)
1277 I.getOperand(0).setReg(AArch64::WZR);
1278 else if (I.getOpcode() == AArch64::STRXui)
1279 I.getOperand(0).setReg(AArch64::XZR);
1280 }
1281 }
1282
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001283 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1284 }
1285
Tim Northover9dd78f82017-02-08 21:22:25 +00001286 case TargetOpcode::G_SMULH:
1287 case TargetOpcode::G_UMULH: {
1288 // Reject the various things we don't support yet.
1289 if (unsupportedBinOp(I, RBI, MRI, TRI))
1290 return false;
1291
1292 const unsigned DefReg = I.getOperand(0).getReg();
1293 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1294
1295 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001296 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001297 return false;
1298 }
1299
1300 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001301 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1302 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001303 return false;
1304 }
1305
1306 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1307 : AArch64::UMULHrr;
1308 I.setDesc(TII.get(NewOpc));
1309
1310 // Now that we selected an opcode, we need to constrain the register
1311 // operands to use appropriate classes.
1312 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1313 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001314 case TargetOpcode::G_FADD:
1315 case TargetOpcode::G_FSUB:
1316 case TargetOpcode::G_FMUL:
1317 case TargetOpcode::G_FDIV:
1318
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001319 case TargetOpcode::G_OR:
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001320 case TargetOpcode::G_SHL:
1321 case TargetOpcode::G_LSHR:
1322 case TargetOpcode::G_ASHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001323 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001324 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001325 if (unsupportedBinOp(I, RBI, MRI, TRI))
1326 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001327
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001328 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001329
1330 const unsigned DefReg = I.getOperand(0).getReg();
1331 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1332
1333 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1334 if (NewOpc == I.getOpcode())
1335 return false;
1336
1337 I.setDesc(TII.get(NewOpc));
1338 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001339
1340 // Now that we selected an opcode, we need to constrain the register
1341 // operands to use appropriate classes.
1342 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1343 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001344
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001345 case TargetOpcode::G_UADDO: {
1346 // TODO: Support other types.
1347 unsigned OpSize = Ty.getSizeInBits();
1348 if (OpSize != 32 && OpSize != 64) {
1349 LLVM_DEBUG(
1350 dbgs()
1351 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1352 return false;
1353 }
1354
1355 // TODO: Support vectors.
1356 if (Ty.isVector()) {
1357 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1358 return false;
1359 }
1360
1361 // Add and set the set condition flag.
1362 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1363 MachineIRBuilder MIRBuilder(I);
1364 auto AddsMI = MIRBuilder.buildInstr(
1365 AddsOpc, {I.getOperand(0).getReg()},
1366 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1367 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1368
1369 // Now, put the overflow result in the register given by the first operand
1370 // to the G_UADDO. CSINC increments the result when the predicate is false,
1371 // so to get the increment when it's true, we need to use the inverse. In
1372 // this case, we want to increment when carry is set.
1373 auto CsetMI = MIRBuilder
1374 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1375 {AArch64::WZR, AArch64::WZR})
1376 .addImm(getInvertedCondCode(AArch64CC::HS));
1377 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1378 I.eraseFromParent();
1379 return true;
1380 }
1381
Tim Northover398c5f52017-02-14 20:56:29 +00001382 case TargetOpcode::G_PTR_MASK: {
1383 uint64_t Align = I.getOperand(2).getImm();
1384 if (Align >= 64 || Align == 0)
1385 return false;
1386
1387 uint64_t Mask = ~((1ULL << Align) - 1);
1388 I.setDesc(TII.get(AArch64::ANDXri));
1389 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1390
1391 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1392 }
Tim Northover037af52c2016-10-31 18:31:09 +00001393 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001394 case TargetOpcode::G_TRUNC: {
1395 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1396 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1397
1398 const unsigned DstReg = I.getOperand(0).getReg();
1399 const unsigned SrcReg = I.getOperand(1).getReg();
1400
1401 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1402 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1403
1404 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001405 LLVM_DEBUG(
1406 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001407 return false;
1408 }
1409
1410 if (DstRB.getID() == AArch64::GPRRegBankID) {
1411 const TargetRegisterClass *DstRC =
1412 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1413 if (!DstRC)
1414 return false;
1415
1416 const TargetRegisterClass *SrcRC =
1417 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1418 if (!SrcRC)
1419 return false;
1420
1421 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1422 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001423 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001424 return false;
1425 }
1426
1427 if (DstRC == SrcRC) {
1428 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001429 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1430 SrcTy == LLT::scalar(64)) {
1431 llvm_unreachable("TableGen can import this case");
1432 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001433 } else if (DstRC == &AArch64::GPR32RegClass &&
1434 SrcRC == &AArch64::GPR64RegClass) {
1435 I.getOperand(1).setSubReg(AArch64::sub_32);
1436 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001437 LLVM_DEBUG(
1438 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001439 return false;
1440 }
1441
1442 I.setDesc(TII.get(TargetOpcode::COPY));
1443 return true;
1444 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1445 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1446 I.setDesc(TII.get(AArch64::XTNv4i16));
1447 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1448 return true;
1449 }
1450 }
1451
1452 return false;
1453 }
1454
Tim Northover3d38b3a2016-10-11 20:50:21 +00001455 case TargetOpcode::G_ANYEXT: {
1456 const unsigned DstReg = I.getOperand(0).getReg();
1457 const unsigned SrcReg = I.getOperand(1).getReg();
1458
Quentin Colombetcb629a82016-10-12 03:57:49 +00001459 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1460 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001461 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1462 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001463 return false;
1464 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001465
Quentin Colombetcb629a82016-10-12 03:57:49 +00001466 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1467 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001468 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1469 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001470 return false;
1471 }
1472
1473 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1474
1475 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001476 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001477 return false;
1478 }
1479
Quentin Colombetcb629a82016-10-12 03:57:49 +00001480 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001481 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1482 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001483 return false;
1484 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001485 // At this point G_ANYEXT is just like a plain COPY, but we need
1486 // to explicitly form the 64-bit value if any.
1487 if (DstSize > 32) {
1488 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1489 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1490 .addDef(ExtSrc)
1491 .addImm(0)
1492 .addUse(SrcReg)
1493 .addImm(AArch64::sub_32);
1494 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001495 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001496 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001497 }
1498
1499 case TargetOpcode::G_ZEXT:
1500 case TargetOpcode::G_SEXT: {
1501 unsigned Opcode = I.getOpcode();
1502 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1503 SrcTy = MRI.getType(I.getOperand(1).getReg());
1504 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1505 const unsigned DefReg = I.getOperand(0).getReg();
1506 const unsigned SrcReg = I.getOperand(1).getReg();
1507 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1508
1509 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001510 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1511 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001512 return false;
1513 }
1514
1515 MachineInstr *ExtI;
1516 if (DstTy == LLT::scalar(64)) {
1517 // FIXME: Can we avoid manually doing this?
1518 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001519 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1520 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001521 return false;
1522 }
1523
1524 const unsigned SrcXReg =
1525 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1526 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1527 .addDef(SrcXReg)
1528 .addImm(0)
1529 .addUse(SrcReg)
1530 .addImm(AArch64::sub_32);
1531
1532 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1533 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1534 .addDef(DefReg)
1535 .addUse(SrcXReg)
1536 .addImm(0)
1537 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001538 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001539 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1540 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1541 .addDef(DefReg)
1542 .addUse(SrcReg)
1543 .addImm(0)
1544 .addImm(SrcTy.getSizeInBits() - 1);
1545 } else {
1546 return false;
1547 }
1548
1549 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1550
1551 I.eraseFromParent();
1552 return true;
1553 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001554
Tim Northover69271c62016-10-12 22:49:11 +00001555 case TargetOpcode::G_SITOFP:
1556 case TargetOpcode::G_UITOFP:
1557 case TargetOpcode::G_FPTOSI:
1558 case TargetOpcode::G_FPTOUI: {
1559 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1560 SrcTy = MRI.getType(I.getOperand(1).getReg());
1561 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1562 if (NewOpc == Opcode)
1563 return false;
1564
1565 I.setDesc(TII.get(NewOpc));
1566 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1567
1568 return true;
1569 }
1570
1571
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001572 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001573 // The importer is currently unable to import pointer types since they
1574 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001575 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001576
Daniel Sandersedd07842017-08-17 09:26:14 +00001577 case TargetOpcode::G_BITCAST:
1578 // Imported SelectionDAG rules can handle every bitcast except those that
1579 // bitcast from a type to the same type. Ideally, these shouldn't occur
1580 // but we might not run an optimizer that deletes them.
1581 if (MRI.getType(I.getOperand(0).getReg()) ==
1582 MRI.getType(I.getOperand(1).getReg()))
1583 return selectCopy(I, TII, MRI, TRI, RBI);
1584 return false;
1585
Tim Northover9ac0eba2016-11-08 00:45:29 +00001586 case TargetOpcode::G_SELECT: {
1587 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001588 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1589 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001590 return false;
1591 }
1592
1593 const unsigned CondReg = I.getOperand(1).getReg();
1594 const unsigned TReg = I.getOperand(2).getReg();
1595 const unsigned FReg = I.getOperand(3).getReg();
1596
1597 unsigned CSelOpc = 0;
1598
1599 if (Ty == LLT::scalar(32)) {
1600 CSelOpc = AArch64::CSELWr;
Kristof Beylse9412b42017-01-19 13:32:14 +00001601 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
Tim Northover9ac0eba2016-11-08 00:45:29 +00001602 CSelOpc = AArch64::CSELXr;
1603 } else {
1604 return false;
1605 }
1606
1607 MachineInstr &TstMI =
1608 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1609 .addDef(AArch64::WZR)
1610 .addUse(CondReg)
1611 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1612
1613 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1614 .addDef(I.getOperand(0).getReg())
1615 .addUse(TReg)
1616 .addUse(FReg)
1617 .addImm(AArch64CC::NE);
1618
1619 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1620 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1621
1622 I.eraseFromParent();
1623 return true;
1624 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001625 case TargetOpcode::G_ICMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001626 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001627 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1628 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001629 return false;
1630 }
1631
1632 unsigned CmpOpc = 0;
1633 unsigned ZReg = 0;
1634
1635 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1636 if (CmpTy == LLT::scalar(32)) {
1637 CmpOpc = AArch64::SUBSWrr;
1638 ZReg = AArch64::WZR;
1639 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1640 CmpOpc = AArch64::SUBSXrr;
1641 ZReg = AArch64::XZR;
1642 } else {
1643 return false;
1644 }
1645
Kristof Beyls22524402017-01-05 10:16:08 +00001646 // CSINC increments the result by one when the condition code is false.
1647 // Therefore, we have to invert the predicate to get an increment by 1 when
1648 // the predicate is true.
1649 const AArch64CC::CondCode invCC =
1650 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1651 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001652
1653 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1654 .addDef(ZReg)
1655 .addUse(I.getOperand(2).getReg())
1656 .addUse(I.getOperand(3).getReg());
1657
1658 MachineInstr &CSetMI =
1659 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1660 .addDef(I.getOperand(0).getReg())
1661 .addUse(AArch64::WZR)
1662 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001663 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001664
1665 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1666 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1667
1668 I.eraseFromParent();
1669 return true;
1670 }
1671
Tim Northover7dd378d2016-10-12 22:49:07 +00001672 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001673 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001674 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1675 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001676 return false;
1677 }
1678
1679 unsigned CmpOpc = 0;
1680 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1681 if (CmpTy == LLT::scalar(32)) {
1682 CmpOpc = AArch64::FCMPSrr;
1683 } else if (CmpTy == LLT::scalar(64)) {
1684 CmpOpc = AArch64::FCMPDrr;
1685 } else {
1686 return false;
1687 }
1688
1689 // FIXME: regbank
1690
1691 AArch64CC::CondCode CC1, CC2;
1692 changeFCMPPredToAArch64CC(
1693 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1694
1695 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1696 .addUse(I.getOperand(2).getReg())
1697 .addUse(I.getOperand(3).getReg());
1698
1699 const unsigned DefReg = I.getOperand(0).getReg();
1700 unsigned Def1Reg = DefReg;
1701 if (CC2 != AArch64CC::AL)
1702 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1703
1704 MachineInstr &CSetMI =
1705 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1706 .addDef(Def1Reg)
1707 .addUse(AArch64::WZR)
1708 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001709 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001710
1711 if (CC2 != AArch64CC::AL) {
1712 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1713 MachineInstr &CSet2MI =
1714 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1715 .addDef(Def2Reg)
1716 .addUse(AArch64::WZR)
1717 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001718 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001719 MachineInstr &OrMI =
1720 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1721 .addDef(DefReg)
1722 .addUse(Def1Reg)
1723 .addUse(Def2Reg);
1724 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1725 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1726 }
1727
1728 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1729 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1730
1731 I.eraseFromParent();
1732 return true;
1733 }
Tim Northovere9600d82017-02-08 17:57:27 +00001734 case TargetOpcode::G_VASTART:
1735 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1736 : selectVaStartAAPCS(I, MF, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001737 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1738 if (!I.getOperand(0).isIntrinsicID())
1739 return false;
1740 if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
1741 return false;
1742 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
1743 .addImm(1);
1744 I.eraseFromParent();
1745 return true;
Amara Emerson1e8c1642018-07-31 00:09:02 +00001746 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001747 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001748 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1749 const unsigned DstReg = I.getOperand(0).getReg();
1750 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1751 const TargetRegisterClass *DstRC =
1752 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1753 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001754 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001755 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001756 case TargetOpcode::G_BLOCK_ADDR: {
1757 if (TM.getCodeModel() == CodeModel::Large) {
1758 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1759 I.eraseFromParent();
1760 return true;
1761 } else {
1762 I.setDesc(TII.get(AArch64::MOVaddrBA));
1763 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1764 I.getOperand(0).getReg())
1765 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1766 /* Offset */ 0, AArch64II::MO_PAGE)
1767 .addBlockAddress(
1768 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1769 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1770 I.eraseFromParent();
1771 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1772 }
1773 }
Amara Emerson5ec14602018-12-10 18:44:58 +00001774 case TargetOpcode::G_BUILD_VECTOR:
1775 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001776 case TargetOpcode::G_MERGE_VALUES:
1777 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001778 case TargetOpcode::G_UNMERGE_VALUES:
1779 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001780 case TargetOpcode::G_SHUFFLE_VECTOR:
1781 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001782 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1783 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001784 case TargetOpcode::G_INSERT_VECTOR_ELT:
1785 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00001786 case TargetOpcode::G_CONCAT_VECTORS:
1787 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001788 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001789
1790 return false;
1791}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00001792
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001793MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00001794 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001795 MachineIRBuilder &MIRBuilder) const {
1796 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00001797
1798 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001799 auto Ins =
1800 MIRBuilder
1801 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
1802 .addImm(SubregIndex);
1803 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
1804 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
1805 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00001806 };
1807
Amara Emerson8acb0d92019-03-04 19:16:00 +00001808 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00001809 case 16:
1810 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00001811 case 32:
1812 return BuildFn(AArch64::ssub);
1813 case 64:
1814 return BuildFn(AArch64::dsub);
1815 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001816 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00001817 }
1818}
1819
Amara Emerson8cb186c2018-12-20 01:11:04 +00001820bool AArch64InstructionSelector::selectMergeValues(
1821 MachineInstr &I, MachineRegisterInfo &MRI) const {
1822 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
1823 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1824 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1825 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
1826
1827 // At the moment we only support merging two s32s into an s64.
1828 if (I.getNumOperands() != 3)
1829 return false;
1830 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
1831 return false;
1832 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
1833 if (RB.getID() != AArch64::GPRRegBankID)
1834 return false;
1835
1836 auto *DstRC = &AArch64::GPR64RegClass;
1837 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
1838 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1839 TII.get(TargetOpcode::SUBREG_TO_REG))
1840 .addDef(SubToRegDef)
1841 .addImm(0)
1842 .addUse(I.getOperand(1).getReg())
1843 .addImm(AArch64::sub_32);
1844 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
1845 // Need to anyext the second scalar before we can use bfm
1846 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1847 TII.get(TargetOpcode::SUBREG_TO_REG))
1848 .addDef(SubToRegDef2)
1849 .addImm(0)
1850 .addUse(I.getOperand(2).getReg())
1851 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001852 MachineInstr &BFM =
1853 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00001854 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00001855 .addUse(SubToRegDef)
1856 .addUse(SubToRegDef2)
1857 .addImm(32)
1858 .addImm(31);
1859 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
1860 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
1861 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
1862 I.eraseFromParent();
1863 return true;
1864}
1865
Jessica Paquette607774c2019-03-11 22:18:01 +00001866static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
1867 const unsigned EltSize) {
1868 // Choose a lane copy opcode and subregister based off of the size of the
1869 // vector's elements.
1870 switch (EltSize) {
1871 case 16:
1872 CopyOpc = AArch64::CPYi16;
1873 ExtractSubReg = AArch64::hsub;
1874 break;
1875 case 32:
1876 CopyOpc = AArch64::CPYi32;
1877 ExtractSubReg = AArch64::ssub;
1878 break;
1879 case 64:
1880 CopyOpc = AArch64::CPYi64;
1881 ExtractSubReg = AArch64::dsub;
1882 break;
1883 default:
1884 // Unknown size, bail out.
1885 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
1886 return false;
1887 }
1888 return true;
1889}
1890
Jessica Paquettebb1aced2019-03-13 21:19:29 +00001891/// Given a register \p Reg, find the value of a constant defining \p Reg.
1892/// Return true if one could be found, and store it in \p Val. Return false
1893/// otherwise.
1894static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI,
1895 unsigned &Val) {
1896 // Look at the def of the register.
1897 MachineInstr *Def = MRI.getVRegDef(Reg);
1898 if (!Def)
1899 return false;
1900
1901 // Find the first definition which isn't a copy.
1902 if (Def->isCopy()) {
1903 Reg = Def->getOperand(1).getReg();
1904 auto It = find_if_not(MRI.reg_nodbg_instructions(Reg),
1905 [](const MachineInstr &MI) { return MI.isCopy(); });
1906 if (It == MRI.reg_instr_nodbg_end()) {
1907 LLVM_DEBUG(dbgs() << "Couldn't find non-copy def for register\n");
1908 return false;
1909 }
1910 Def = &*It;
1911 }
1912
1913 // TODO: Handle opcodes other than G_CONSTANT.
1914 if (Def->getOpcode() != TargetOpcode::G_CONSTANT) {
1915 LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
1916 "currently unsupported.\n");
1917 return false;
1918 }
1919
1920 // Return the constant value associated with the operand.
1921 Val = Def->getOperand(1).getCImm()->getLimitedValue();
1922 return true;
1923}
1924
Amara Emersond61b89b2019-03-14 22:48:18 +00001925MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
1926 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
1927 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
1928 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1929 unsigned CopyOpc = 0;
1930 unsigned ExtractSubReg = 0;
1931 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
1932 LLVM_DEBUG(
1933 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
1934 return nullptr;
1935 }
1936
1937 const TargetRegisterClass *DstRC =
1938 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
1939 if (!DstRC) {
1940 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
1941 return nullptr;
1942 }
1943
1944 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
1945 const LLT &VecTy = MRI.getType(VecReg);
1946 const TargetRegisterClass *VecRC =
1947 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
1948 if (!VecRC) {
1949 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
1950 return nullptr;
1951 }
1952
1953 // The register that we're going to copy into.
1954 unsigned InsertReg = VecReg;
1955 if (!DstReg)
1956 DstReg = MRI.createVirtualRegister(DstRC);
1957 // If the lane index is 0, we just use a subregister COPY.
1958 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00001959 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
1960 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001961 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00001962 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00001963 }
1964
1965 // Lane copies require 128-bit wide registers. If we're dealing with an
1966 // unpacked vector, then we need to move up to that width. Insert an implicit
1967 // def and a subregister insert to get us there.
1968 if (VecTy.getSizeInBits() != 128) {
1969 MachineInstr *ScalarToVector = emitScalarToVector(
1970 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
1971 if (!ScalarToVector)
1972 return nullptr;
1973 InsertReg = ScalarToVector->getOperand(0).getReg();
1974 }
1975
1976 MachineInstr *LaneCopyMI =
1977 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
1978 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
1979
1980 // Make sure that we actually constrain the initial copy.
1981 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
1982 return LaneCopyMI;
1983}
1984
Jessica Paquette607774c2019-03-11 22:18:01 +00001985bool AArch64InstructionSelector::selectExtractElt(
1986 MachineInstr &I, MachineRegisterInfo &MRI) const {
1987 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
1988 "unexpected opcode!");
1989 unsigned DstReg = I.getOperand(0).getReg();
1990 const LLT NarrowTy = MRI.getType(DstReg);
1991 const unsigned SrcReg = I.getOperand(1).getReg();
1992 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001993 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00001994 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
1995 "source register size too small!");
1996 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
1997
1998 // Need the lane index to determine the correct copy opcode.
1999 MachineOperand &LaneIdxOp = I.getOperand(2);
2000 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2001
2002 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2003 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2004 return false;
2005 }
2006
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002007 // Find the index to extract from.
2008 unsigned LaneIdx = 0;
2009 if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
Jessica Paquette607774c2019-03-11 22:18:01 +00002010 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002011
Jessica Paquette607774c2019-03-11 22:18:01 +00002012 MachineIRBuilder MIRBuilder(I);
2013
Amara Emersond61b89b2019-03-14 22:48:18 +00002014 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2015 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2016 LaneIdx, MIRBuilder);
2017 if (!Extract)
2018 return false;
2019
2020 I.eraseFromParent();
2021 return true;
2022}
2023
2024bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2025 MachineInstr &I, MachineRegisterInfo &MRI) const {
2026 unsigned NumElts = I.getNumOperands() - 1;
2027 unsigned SrcReg = I.getOperand(NumElts).getReg();
2028 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2029 const LLT SrcTy = MRI.getType(SrcReg);
2030
2031 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2032 if (SrcTy.getSizeInBits() > 128) {
2033 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2034 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002035 }
2036
Amara Emersond61b89b2019-03-14 22:48:18 +00002037 MachineIRBuilder MIB(I);
2038
2039 // We implement a split vector operation by treating the sub-vectors as
2040 // scalars and extracting them.
2041 const RegisterBank &DstRB =
2042 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2043 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2044 unsigned Dst = I.getOperand(OpIdx).getReg();
2045 MachineInstr *Extract =
2046 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2047 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002048 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002049 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002050 I.eraseFromParent();
2051 return true;
2052}
2053
Jessica Paquette245047d2019-01-24 22:00:41 +00002054bool AArch64InstructionSelector::selectUnmergeValues(
2055 MachineInstr &I, MachineRegisterInfo &MRI) const {
2056 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2057 "unexpected opcode");
2058
2059 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2060 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2061 AArch64::FPRRegBankID ||
2062 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2063 AArch64::FPRRegBankID) {
2064 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2065 "currently unsupported.\n");
2066 return false;
2067 }
2068
2069 // The last operand is the vector source register, and every other operand is
2070 // a register to unpack into.
2071 unsigned NumElts = I.getNumOperands() - 1;
2072 unsigned SrcReg = I.getOperand(NumElts).getReg();
2073 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2074 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002075 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002076 assert(WideTy.isVector() && "can only unmerge from vector types!");
2077 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2078 "source register size too small!");
2079
Amara Emersond61b89b2019-03-14 22:48:18 +00002080 if (!NarrowTy.isScalar())
2081 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002082
Amara Emerson3739a202019-03-15 21:59:50 +00002083 MachineIRBuilder MIB(I);
2084
Jessica Paquette245047d2019-01-24 22:00:41 +00002085 // Choose a lane copy opcode and subregister based off of the size of the
2086 // vector's elements.
2087 unsigned CopyOpc = 0;
2088 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002089 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002090 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002091
2092 // Set up for the lane copies.
2093 MachineBasicBlock &MBB = *I.getParent();
2094
2095 // Stores the registers we'll be copying from.
2096 SmallVector<unsigned, 4> InsertRegs;
2097
2098 // We'll use the first register twice, so we only need NumElts-1 registers.
2099 unsigned NumInsertRegs = NumElts - 1;
2100
2101 // If our elements fit into exactly 128 bits, then we can copy from the source
2102 // directly. Otherwise, we need to do a bit of setup with some subregister
2103 // inserts.
2104 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2105 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2106 } else {
2107 // No. We have to perform subregister inserts. For each insert, create an
2108 // implicit def and a subregister insert, and save the register we create.
2109 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2110 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2111 MachineInstr &ImpDefMI =
2112 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2113 ImpDefReg);
2114
2115 // Now, create the subregister insert from SrcReg.
2116 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2117 MachineInstr &InsMI =
2118 *BuildMI(MBB, I, I.getDebugLoc(),
2119 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2120 .addUse(ImpDefReg)
2121 .addUse(SrcReg)
2122 .addImm(AArch64::dsub);
2123
2124 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2125 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2126
2127 // Save the register so that we can copy from it after.
2128 InsertRegs.push_back(InsertReg);
2129 }
2130 }
2131
2132 // Now that we've created any necessary subregister inserts, we can
2133 // create the copies.
2134 //
2135 // Perform the first copy separately as a subregister copy.
2136 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002137 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2138 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002139 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002140
2141 // Now, perform the remaining copies as vector lane copies.
2142 unsigned LaneIdx = 1;
2143 for (unsigned InsReg : InsertRegs) {
2144 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2145 MachineInstr &CopyInst =
2146 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2147 .addUse(InsReg)
2148 .addImm(LaneIdx);
2149 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2150 ++LaneIdx;
2151 }
2152
2153 // Separately constrain the first copy's destination. Because of the
2154 // limitation in constrainOperandRegClass, we can't guarantee that this will
2155 // actually be constrained. So, do it ourselves using the second operand.
2156 const TargetRegisterClass *RC =
2157 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2158 if (!RC) {
2159 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2160 return false;
2161 }
2162
2163 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2164 I.eraseFromParent();
2165 return true;
2166}
2167
Amara Emerson2ff22982019-03-14 22:48:15 +00002168bool AArch64InstructionSelector::selectConcatVectors(
2169 MachineInstr &I, MachineRegisterInfo &MRI) const {
2170 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2171 "Unexpected opcode");
2172 unsigned Dst = I.getOperand(0).getReg();
2173 unsigned Op1 = I.getOperand(1).getReg();
2174 unsigned Op2 = I.getOperand(2).getReg();
2175 MachineIRBuilder MIRBuilder(I);
2176 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2177 if (!ConcatMI)
2178 return false;
2179 I.eraseFromParent();
2180 return true;
2181}
2182
Amara Emerson1abe05c2019-02-21 20:20:16 +00002183void AArch64InstructionSelector::collectShuffleMaskIndices(
2184 MachineInstr &I, MachineRegisterInfo &MRI,
2185 SmallVectorImpl<int> &Idxs) const {
2186 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2187 assert(
2188 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2189 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2190 // Find the constant indices.
2191 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2192 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2193 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2194 // Look through copies.
2195 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2196 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2197 assert(ScalarDef && "Could not find def of copy operand");
2198 }
2199 assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT);
2200 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2201 }
2202}
2203
2204unsigned
2205AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2206 MachineFunction &MF) const {
2207 Type *CPTy = CPVal->getType()->getPointerTo();
2208 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2209 if (Align == 0)
2210 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2211
2212 MachineConstantPool *MCP = MF.getConstantPool();
2213 return MCP->getConstantPoolIndex(CPVal, Align);
2214}
2215
2216MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2217 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2218 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2219
2220 auto Adrp =
2221 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2222 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002223
2224 MachineInstr *LoadMI = nullptr;
2225 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2226 case 16:
2227 LoadMI =
2228 &*MIRBuilder
2229 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2230 .addConstantPoolIndex(CPIdx, 0,
2231 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2232 break;
2233 case 8:
2234 LoadMI = &*MIRBuilder
2235 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2236 .addConstantPoolIndex(
2237 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2238 break;
2239 default:
2240 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2241 << *CPVal->getType());
2242 return nullptr;
2243 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002244 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002245 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2246 return LoadMI;
2247}
2248
2249/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2250/// size and RB.
2251static std::pair<unsigned, unsigned>
2252getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2253 unsigned Opc, SubregIdx;
2254 if (RB.getID() == AArch64::GPRRegBankID) {
2255 if (EltSize == 32) {
2256 Opc = AArch64::INSvi32gpr;
2257 SubregIdx = AArch64::ssub;
2258 } else if (EltSize == 64) {
2259 Opc = AArch64::INSvi64gpr;
2260 SubregIdx = AArch64::dsub;
2261 } else {
2262 llvm_unreachable("invalid elt size!");
2263 }
2264 } else {
2265 if (EltSize == 8) {
2266 Opc = AArch64::INSvi8lane;
2267 SubregIdx = AArch64::bsub;
2268 } else if (EltSize == 16) {
2269 Opc = AArch64::INSvi16lane;
2270 SubregIdx = AArch64::hsub;
2271 } else if (EltSize == 32) {
2272 Opc = AArch64::INSvi32lane;
2273 SubregIdx = AArch64::ssub;
2274 } else if (EltSize == 64) {
2275 Opc = AArch64::INSvi64lane;
2276 SubregIdx = AArch64::dsub;
2277 } else {
2278 llvm_unreachable("invalid elt size!");
2279 }
2280 }
2281 return std::make_pair(Opc, SubregIdx);
2282}
2283
2284MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002285 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2286 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002287 // We implement a vector concat by:
2288 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2289 // 2. Insert the upper vector into the destination's upper element
2290 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2291 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2292
2293 const LLT Op1Ty = MRI.getType(Op1);
2294 const LLT Op2Ty = MRI.getType(Op2);
2295
2296 if (Op1Ty != Op2Ty) {
2297 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2298 return nullptr;
2299 }
2300 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2301
2302 if (Op1Ty.getSizeInBits() >= 128) {
2303 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2304 return nullptr;
2305 }
2306
2307 // At the moment we just support 64 bit vector concats.
2308 if (Op1Ty.getSizeInBits() != 64) {
2309 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2310 return nullptr;
2311 }
2312
2313 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2314 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2315 const TargetRegisterClass *DstRC =
2316 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2317
2318 MachineInstr *WidenedOp1 =
2319 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2320 MachineInstr *WidenedOp2 =
2321 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2322 if (!WidenedOp1 || !WidenedOp2) {
2323 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2324 return nullptr;
2325 }
2326
2327 // Now do the insert of the upper element.
2328 unsigned InsertOpc, InsSubRegIdx;
2329 std::tie(InsertOpc, InsSubRegIdx) =
2330 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2331
Amara Emerson2ff22982019-03-14 22:48:15 +00002332 if (!Dst)
2333 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002334 auto InsElt =
2335 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002336 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002337 .addImm(1) /* Lane index */
2338 .addUse(WidenedOp2->getOperand(0).getReg())
2339 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002340 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2341 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002342}
2343
Amara Emerson761ca2e2019-03-19 21:43:05 +00002344bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2345 // Try to match a vector splat operation into a dup instruction.
2346 // We're looking for this pattern:
2347 // %scalar:gpr(s64) = COPY $x0
2348 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2349 // %cst0:gpr(s32) = G_CONSTANT i32 0
2350 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2351 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2352 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2353 // %zerovec(<2 x s32>)
2354 //
2355 // ...into:
2356 // %splat = DUP %scalar
2357 // We use the regbank of the scalar to determine which kind of dup to use.
2358 MachineIRBuilder MIB(I);
2359 MachineRegisterInfo &MRI = *MIB.getMRI();
2360 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2361 using namespace TargetOpcode;
2362 using namespace MIPatternMatch;
2363
2364 // Begin matching the insert.
2365 auto *InsMI =
2366 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2367 if (!InsMI)
2368 return false;
2369 // Match the undef vector operand.
2370 auto *UndefMI =
2371 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2372 if (!UndefMI)
2373 return false;
2374 // Match the scalar being splatted.
2375 unsigned ScalarReg = InsMI->getOperand(2).getReg();
2376 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2377 // Match the index constant 0.
2378 int64_t Index = 0;
2379 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2380 return false;
2381
2382 // The shuffle's second operand doesn't matter if the mask is all zero.
2383 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2384 if (!ZeroVec)
2385 return false;
2386 int64_t Zero = 0;
2387 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2388 return false;
2389 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2390 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2391 return false; // This wasn't an all zeros vector.
2392 }
2393
2394 // We're done, now find out what kind of splat we need.
2395 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2396 LLT EltTy = VecTy.getElementType();
2397 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2398 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2399 return false;
2400 }
2401 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2402 static const unsigned OpcTable[2][2] = {
2403 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2404 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2405 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2406
2407 // For FP splats, we need to widen the scalar reg via undef too.
2408 if (IsFP) {
2409 MachineInstr *Widen = emitScalarToVector(
2410 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2411 if (!Widen)
2412 return false;
2413 ScalarReg = Widen->getOperand(0).getReg();
2414 }
2415 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
2416 if (IsFP)
2417 Dup.addImm(0);
2418 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
2419 I.eraseFromParent();
2420 return true;
2421}
2422
2423bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
2424 if (TM.getOptLevel() == CodeGenOpt::None)
2425 return false;
2426 if (tryOptVectorDup(I))
2427 return true;
2428 return false;
2429}
2430
Amara Emerson1abe05c2019-02-21 20:20:16 +00002431bool AArch64InstructionSelector::selectShuffleVector(
2432 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00002433 if (tryOptVectorShuffle(I))
2434 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002435 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2436 unsigned Src1Reg = I.getOperand(1).getReg();
2437 const LLT Src1Ty = MRI.getType(Src1Reg);
2438 unsigned Src2Reg = I.getOperand(2).getReg();
2439 const LLT Src2Ty = MRI.getType(Src2Reg);
2440
2441 MachineBasicBlock &MBB = *I.getParent();
2442 MachineFunction &MF = *MBB.getParent();
2443 LLVMContext &Ctx = MF.getFunction().getContext();
2444
2445 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2446 // operand, it comes in as a normal vector value which we have to analyze to
2447 // find the mask indices.
2448 SmallVector<int, 8> Mask;
2449 collectShuffleMaskIndices(I, MRI, Mask);
2450 assert(!Mask.empty() && "Expected to find mask indices");
2451
2452 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2453 // it's originated from a <1 x T> type. Those should have been lowered into
2454 // G_BUILD_VECTOR earlier.
2455 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2456 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2457 return false;
2458 }
2459
2460 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2461
2462 SmallVector<Constant *, 64> CstIdxs;
2463 for (int Val : Mask) {
2464 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2465 unsigned Offset = Byte + Val * BytesPerElt;
2466 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2467 }
2468 }
2469
Amara Emerson8acb0d92019-03-04 19:16:00 +00002470 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002471
2472 // Use a constant pool to load the index vector for TBL.
2473 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002474 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2475 if (!IndexLoad) {
2476 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2477 return false;
2478 }
2479
Amara Emerson8acb0d92019-03-04 19:16:00 +00002480 if (DstTy.getSizeInBits() != 128) {
2481 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2482 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00002483 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002484 if (!Concat) {
2485 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2486 return false;
2487 }
2488
2489 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2490 IndexLoad =
2491 emitScalarToVector(64, &AArch64::FPR128RegClass,
2492 IndexLoad->getOperand(0).getReg(), MIRBuilder);
2493
2494 auto TBL1 = MIRBuilder.buildInstr(
2495 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2496 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2497 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
2498
Amara Emerson3739a202019-03-15 21:59:50 +00002499 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00002500 MIRBuilder
2501 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2502 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002503 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2504 I.eraseFromParent();
2505 return true;
2506 }
2507
Amara Emerson1abe05c2019-02-21 20:20:16 +00002508 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2509 // Q registers for regalloc.
2510 auto RegSeq = MIRBuilder
2511 .buildInstr(TargetOpcode::REG_SEQUENCE,
2512 {&AArch64::QQRegClass}, {Src1Reg})
2513 .addImm(AArch64::qsub0)
2514 .addUse(Src2Reg)
2515 .addImm(AArch64::qsub1);
2516
2517 auto TBL2 =
2518 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2519 {RegSeq, IndexLoad->getOperand(0).getReg()});
2520 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2521 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
2522 I.eraseFromParent();
2523 return true;
2524}
2525
Jessica Paquette16d67a32019-03-13 23:22:23 +00002526MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2527 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2528 unsigned LaneIdx, const RegisterBank &RB,
2529 MachineIRBuilder &MIRBuilder) const {
2530 MachineInstr *InsElt = nullptr;
2531 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2532 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2533
2534 // Create a register to define with the insert if one wasn't passed in.
2535 if (!DstReg)
2536 DstReg = MRI.createVirtualRegister(DstRC);
2537
2538 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2539 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2540
2541 if (RB.getID() == AArch64::FPRRegBankID) {
2542 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2543 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2544 .addImm(LaneIdx)
2545 .addUse(InsSub->getOperand(0).getReg())
2546 .addImm(0);
2547 } else {
2548 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2549 .addImm(LaneIdx)
2550 .addUse(EltReg);
2551 }
2552
2553 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2554 return InsElt;
2555}
2556
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002557bool AArch64InstructionSelector::selectInsertElt(
2558 MachineInstr &I, MachineRegisterInfo &MRI) const {
2559 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2560
2561 // Get information on the destination.
2562 unsigned DstReg = I.getOperand(0).getReg();
2563 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002564 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002565
2566 // Get information on the element we want to insert into the destination.
2567 unsigned EltReg = I.getOperand(2).getReg();
2568 const LLT EltTy = MRI.getType(EltReg);
2569 unsigned EltSize = EltTy.getSizeInBits();
2570 if (EltSize < 16 || EltSize > 64)
2571 return false; // Don't support all element types yet.
2572
2573 // Find the definition of the index. Bail out if it's not defined by a
2574 // G_CONSTANT.
2575 unsigned IdxReg = I.getOperand(3).getReg();
2576 unsigned LaneIdx = 0;
2577 if (!getConstantValueForReg(IdxReg, MRI, LaneIdx))
2578 return false;
2579
2580 // Perform the lane insert.
2581 unsigned SrcReg = I.getOperand(1).getReg();
2582 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2583 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002584
2585 if (VecSize < 128) {
2586 // If the vector we're inserting into is smaller than 128 bits, widen it
2587 // to 128 to do the insert.
2588 MachineInstr *ScalarToVec = emitScalarToVector(
2589 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
2590 if (!ScalarToVec)
2591 return false;
2592 SrcReg = ScalarToVec->getOperand(0).getReg();
2593 }
2594
2595 // Create an insert into a new FPR128 register.
2596 // Note that if our vector is already 128 bits, we end up emitting an extra
2597 // register.
2598 MachineInstr *InsMI =
2599 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2600
2601 if (VecSize < 128) {
2602 // If we had to widen to perform the insert, then we have to demote back to
2603 // the original size to get the result we want.
2604 unsigned DemoteVec = InsMI->getOperand(0).getReg();
2605 const TargetRegisterClass *RC =
2606 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
2607 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2608 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2609 return false;
2610 }
2611 unsigned SubReg = 0;
2612 if (!getSubRegForClass(RC, TRI, SubReg))
2613 return false;
2614 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2615 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
2616 << "\n");
2617 return false;
2618 }
2619 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2620 .addReg(DemoteVec, 0, SubReg);
2621 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2622 } else {
2623 // No widening needed.
2624 InsMI->getOperand(0).setReg(DstReg);
2625 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2626 }
2627
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002628 I.eraseFromParent();
2629 return true;
2630}
2631
Amara Emerson5ec14602018-12-10 18:44:58 +00002632bool AArch64InstructionSelector::selectBuildVector(
2633 MachineInstr &I, MachineRegisterInfo &MRI) const {
2634 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
2635 // Until we port more of the optimized selections, for now just use a vector
2636 // insert sequence.
2637 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2638 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
2639 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00002640 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00002641 return false; // Don't support all element types yet.
2642 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002643 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00002644
2645 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002646 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00002647 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
2648 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002649 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00002650 return false;
2651
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002652 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002653 unsigned DstSize = DstTy.getSizeInBits();
2654
2655 // Keep track of the last MI we inserted. Later on, we might be able to save
2656 // a copy using it.
2657 MachineInstr *PrevMI = nullptr;
2658 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00002659 // Note that if we don't do a subregister copy, we can end up making an
2660 // extra register.
2661 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
2662 MIRBuilder);
2663 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00002664 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002665
2666 // If DstTy's size in bits is less than 128, then emit a subregister copy
2667 // from DstVec to the last register we've defined.
2668 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00002669 // Force this to be FPR using the destination vector.
2670 const TargetRegisterClass *RC =
2671 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00002672 if (!RC)
2673 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00002674 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2675 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2676 return false;
2677 }
2678
2679 unsigned SubReg = 0;
2680 if (!getSubRegForClass(RC, TRI, SubReg))
2681 return false;
2682 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2683 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
2684 << "\n");
2685 return false;
2686 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002687
2688 unsigned Reg = MRI.createVirtualRegister(RC);
2689 unsigned DstReg = I.getOperand(0).getReg();
2690
Amara Emerson86271782019-03-18 19:20:10 +00002691 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2692 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00002693 MachineOperand &RegOp = I.getOperand(1);
2694 RegOp.setReg(Reg);
2695 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2696 } else {
2697 // We don't need a subregister copy. Save a copy by re-using the
2698 // destination register on the final insert.
2699 assert(PrevMI && "PrevMI was null?");
2700 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
2701 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
2702 }
2703
Amara Emerson5ec14602018-12-10 18:44:58 +00002704 I.eraseFromParent();
2705 return true;
2706}
2707
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002708/// SelectArithImmed - Select an immediate value that can be represented as
2709/// a 12-bit value shifted left by either 0 or 12. If so, return true with
2710/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002711InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00002712AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002713 MachineInstr &MI = *Root.getParent();
2714 MachineBasicBlock &MBB = *MI.getParent();
2715 MachineFunction &MF = *MBB.getParent();
2716 MachineRegisterInfo &MRI = MF.getRegInfo();
2717
2718 // This function is called from the addsub_shifted_imm ComplexPattern,
2719 // which lists [imm] as the list of opcode it's interested in, however
2720 // we still need to check whether the operand is actually an immediate
2721 // here because the ComplexPattern opcode list is only used in
2722 // root-level opcode matching.
2723 uint64_t Immed;
2724 if (Root.isImm())
2725 Immed = Root.getImm();
2726 else if (Root.isCImm())
2727 Immed = Root.getCImm()->getZExtValue();
2728 else if (Root.isReg()) {
2729 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
2730 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002731 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002732 MachineOperand &Op1 = Def->getOperand(1);
2733 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002734 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002735 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002736 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002737 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002738
2739 unsigned ShiftAmt;
2740
2741 if (Immed >> 12 == 0) {
2742 ShiftAmt = 0;
2743 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
2744 ShiftAmt = 12;
2745 Immed = Immed >> 12;
2746 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002747 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002748
2749 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002750 return {{
2751 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
2752 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
2753 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002754}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002755
Daniel Sandersea8711b2017-10-16 03:36:29 +00002756/// Select a "register plus unscaled signed 9-bit immediate" address. This
2757/// should only match when there is an offset that is not valid for a scaled
2758/// immediate addressing mode. The "Size" argument is the size in bytes of the
2759/// memory reference, which is needed here to know what is valid for a scaled
2760/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002761InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002762AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
2763 unsigned Size) const {
2764 MachineRegisterInfo &MRI =
2765 Root.getParent()->getParent()->getParent()->getRegInfo();
2766
2767 if (!Root.isReg())
2768 return None;
2769
2770 if (!isBaseWithConstantOffset(Root, MRI))
2771 return None;
2772
2773 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2774 if (!RootDef)
2775 return None;
2776
2777 MachineOperand &OffImm = RootDef->getOperand(2);
2778 if (!OffImm.isReg())
2779 return None;
2780 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
2781 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
2782 return None;
2783 int64_t RHSC;
2784 MachineOperand &RHSOp1 = RHS->getOperand(1);
2785 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
2786 return None;
2787 RHSC = RHSOp1.getCImm()->getSExtValue();
2788
2789 // If the offset is valid as a scaled immediate, don't match here.
2790 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
2791 return None;
2792 if (RHSC >= -256 && RHSC < 256) {
2793 MachineOperand &Base = RootDef->getOperand(1);
2794 return {{
2795 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
2796 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
2797 }};
2798 }
2799 return None;
2800}
2801
2802/// Select a "register plus scaled unsigned 12-bit immediate" address. The
2803/// "Size" argument is the size in bytes of the memory reference, which
2804/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002805InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002806AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
2807 unsigned Size) const {
2808 MachineRegisterInfo &MRI =
2809 Root.getParent()->getParent()->getParent()->getRegInfo();
2810
2811 if (!Root.isReg())
2812 return None;
2813
2814 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2815 if (!RootDef)
2816 return None;
2817
2818 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
2819 return {{
2820 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
2821 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2822 }};
2823 }
2824
2825 if (isBaseWithConstantOffset(Root, MRI)) {
2826 MachineOperand &LHS = RootDef->getOperand(1);
2827 MachineOperand &RHS = RootDef->getOperand(2);
2828 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
2829 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
2830 if (LHSDef && RHSDef) {
2831 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
2832 unsigned Scale = Log2_32(Size);
2833 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
2834 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00002835 return {{
2836 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
2837 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2838 }};
2839
Daniel Sandersea8711b2017-10-16 03:36:29 +00002840 return {{
2841 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
2842 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2843 }};
2844 }
2845 }
2846 }
2847
2848 // Before falling back to our general case, check if the unscaled
2849 // instructions can handle this. If so, that's preferable.
2850 if (selectAddrModeUnscaled(Root, Size).hasValue())
2851 return None;
2852
2853 return {{
2854 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2855 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2856 }};
2857}
2858
Volkan Kelesf7f25682018-01-16 18:44:05 +00002859void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
2860 const MachineInstr &MI) const {
2861 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2862 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
2863 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
2864 assert(CstVal && "Expected constant value");
2865 MIB.addImm(CstVal.getValue());
2866}
2867
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002868namespace llvm {
2869InstructionSelector *
2870createAArch64InstructionSelector(const AArch64TargetMachine &TM,
2871 AArch64Subtarget &Subtarget,
2872 AArch64RegisterBankInfo &RBI) {
2873 return new AArch64InstructionSelector(TM, Subtarget, RBI);
2874}
2875}