blob: c3da315ba12a45c6216683567644f387f8697d43 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson9bf092d2019-04-09 21:22:43 +000070 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72
Amara Emerson5ec14602018-12-10 18:44:58 +000073 // Helper to generate an equivalent of scalar_to_vector into a new register,
74 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000075 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000076 const TargetRegisterClass *DstRC,
77 unsigned Scalar,
78 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000079
80 /// Emit a lane insert into \p DstReg, or a new vector register if None is
81 /// provided.
82 ///
83 /// The lane inserted into is defined by \p LaneIdx. The vector source
84 /// register is given by \p SrcReg. The register containing the element is
85 /// given by \p EltReg.
86 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
87 unsigned EltReg, unsigned LaneIdx,
88 const RegisterBank &RB,
89 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000090 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000092 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000093 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000094
Amara Emerson1abe05c2019-02-21 20:20:16 +000095 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +000096 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000097 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000098 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000099 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000100 bool selectSplitVectorUnmerge(MachineInstr &I,
101 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000102 bool selectIntrinsicWithSideEffects(MachineInstr &I,
103 MachineRegisterInfo &MRI) const;
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +0000104 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000105 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000106 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000107 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000108 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
109 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
110 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000111
112 // Emit a vector concat operation.
113 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
114 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000115 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000116 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
117 const RegisterBank &DstRB, LLT ScalarTy,
118 unsigned VecReg, unsigned LaneIdx,
119 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000120
Jessica Paquettea3843fe2019-05-01 22:39:43 +0000121 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
122 /// materialized using a FMOV instruction, then update MI and return it.
123 /// Otherwise, do nothing and return a nullptr.
124 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
125 MachineRegisterInfo &MRI) const;
126
Jessica Paquette49537bb2019-06-17 18:40:06 +0000127 /// Emit a CSet for a compare.
128 MachineInstr *emitCSetForICMP(unsigned DefReg, unsigned Pred,
129 MachineIRBuilder &MIRBuilder) const;
130
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000131 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000132
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000133 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
134 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000135
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000136 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000137 return selectAddrModeUnscaled(Root, 1);
138 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000139 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000140 return selectAddrModeUnscaled(Root, 2);
141 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000142 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000143 return selectAddrModeUnscaled(Root, 4);
144 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000145 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000146 return selectAddrModeUnscaled(Root, 8);
147 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000148 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000149 return selectAddrModeUnscaled(Root, 16);
150 }
151
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000152 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
153 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000154 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000155 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000156 return selectAddrModeIndexed(Root, Width / 8);
157 }
158
Volkan Kelesf7f25682018-01-16 18:44:05 +0000159 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
160
Amara Emerson1e8c1642018-07-31 00:09:02 +0000161 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
162 void materializeLargeCMVal(MachineInstr &I, const Value *V,
163 unsigned char OpFlags) const;
164
Amara Emerson761ca2e2019-03-19 21:43:05 +0000165 // Optimization methods.
166
167 // Helper function to check if a reg def is an MI with a given opcode and
168 // returns it if so.
169 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
170 MachineIRBuilder &MIB) const {
171 auto *Def = MIB.getMRI()->getVRegDef(Reg);
172 if (!Def || Def->getOpcode() != Opc)
173 return nullptr;
174 return Def;
175 }
176
177 bool tryOptVectorShuffle(MachineInstr &I) const;
178 bool tryOptVectorDup(MachineInstr &MI) const;
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000179 bool tryOptSelect(MachineInstr &MI) const;
Jessica Paquette49537bb2019-06-17 18:40:06 +0000180 bool tryOptCMN(MachineInstr &MI) const;
Amara Emerson761ca2e2019-03-19 21:43:05 +0000181
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000182 const AArch64TargetMachine &TM;
183 const AArch64Subtarget &STI;
184 const AArch64InstrInfo &TII;
185 const AArch64RegisterInfo &TRI;
186 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000187
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000188#define GET_GLOBALISEL_PREDICATES_DECL
189#include "AArch64GenGlobalISel.inc"
190#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000191
192// We declare the temporaries used by selectImpl() in the class to minimize the
193// cost of constructing placeholder values.
194#define GET_GLOBALISEL_TEMPORARIES_DECL
195#include "AArch64GenGlobalISel.inc"
196#undef GET_GLOBALISEL_TEMPORARIES_DECL
197};
198
199} // end anonymous namespace
200
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000201#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000202#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000203#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000204
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000205AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000206 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
207 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000208 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000209 TRI(*STI.getRegisterInfo()), RBI(RBI),
210#define GET_GLOBALISEL_PREDICATES_INIT
211#include "AArch64GenGlobalISel.inc"
212#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000213#define GET_GLOBALISEL_TEMPORARIES_INIT
214#include "AArch64GenGlobalISel.inc"
215#undef GET_GLOBALISEL_TEMPORARIES_INIT
216{
217}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000218
Tim Northoverfb8d9892016-10-12 22:49:15 +0000219// FIXME: This should be target-independent, inferred from the types declared
220// for each class in the bank.
221static const TargetRegisterClass *
222getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000223 const RegisterBankInfo &RBI,
224 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000225 if (RB.getID() == AArch64::GPRRegBankID) {
226 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000227 return GetAllRegSet ? &AArch64::GPR32allRegClass
228 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000229 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000230 return GetAllRegSet ? &AArch64::GPR64allRegClass
231 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000232 return nullptr;
233 }
234
235 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000236 if (Ty.getSizeInBits() <= 16)
237 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000238 if (Ty.getSizeInBits() == 32)
239 return &AArch64::FPR32RegClass;
240 if (Ty.getSizeInBits() == 64)
241 return &AArch64::FPR64RegClass;
242 if (Ty.getSizeInBits() == 128)
243 return &AArch64::FPR128RegClass;
244 return nullptr;
245 }
246
247 return nullptr;
248}
249
Jessica Paquette245047d2019-01-24 22:00:41 +0000250/// Given a register bank, and size in bits, return the smallest register class
251/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000252static const TargetRegisterClass *
253getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
254 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000255 unsigned RegBankID = RB.getID();
256
257 if (RegBankID == AArch64::GPRRegBankID) {
258 if (SizeInBits <= 32)
259 return GetAllRegSet ? &AArch64::GPR32allRegClass
260 : &AArch64::GPR32RegClass;
261 if (SizeInBits == 64)
262 return GetAllRegSet ? &AArch64::GPR64allRegClass
263 : &AArch64::GPR64RegClass;
264 }
265
266 if (RegBankID == AArch64::FPRRegBankID) {
267 switch (SizeInBits) {
268 default:
269 return nullptr;
270 case 8:
271 return &AArch64::FPR8RegClass;
272 case 16:
273 return &AArch64::FPR16RegClass;
274 case 32:
275 return &AArch64::FPR32RegClass;
276 case 64:
277 return &AArch64::FPR64RegClass;
278 case 128:
279 return &AArch64::FPR128RegClass;
280 }
281 }
282
283 return nullptr;
284}
285
286/// Returns the correct subregister to use for a given register class.
287static bool getSubRegForClass(const TargetRegisterClass *RC,
288 const TargetRegisterInfo &TRI, unsigned &SubReg) {
289 switch (TRI.getRegSizeInBits(*RC)) {
290 case 8:
291 SubReg = AArch64::bsub;
292 break;
293 case 16:
294 SubReg = AArch64::hsub;
295 break;
296 case 32:
297 if (RC == &AArch64::GPR32RegClass)
298 SubReg = AArch64::sub_32;
299 else
300 SubReg = AArch64::ssub;
301 break;
302 case 64:
303 SubReg = AArch64::dsub;
304 break;
305 default:
306 LLVM_DEBUG(
307 dbgs() << "Couldn't find appropriate subregister for register class.");
308 return false;
309 }
310
311 return true;
312}
313
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000314/// Check whether \p I is a currently unsupported binary operation:
315/// - it has an unsized type
316/// - an operand is not a vreg
317/// - all operands are not in the same bank
318/// These are checks that should someday live in the verifier, but right now,
319/// these are mostly limitations of the aarch64 selector.
320static bool unsupportedBinOp(const MachineInstr &I,
321 const AArch64RegisterBankInfo &RBI,
322 const MachineRegisterInfo &MRI,
323 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000324 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000325 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000326 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000327 return true;
328 }
329
330 const RegisterBank *PrevOpBank = nullptr;
331 for (auto &MO : I.operands()) {
332 // FIXME: Support non-register operands.
333 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000334 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000335 return true;
336 }
337
338 // FIXME: Can generic operations have physical registers operands? If
339 // so, this will need to be taught about that, and we'll need to get the
340 // bank out of the minimal class for the register.
341 // Either way, this needs to be documented (and possibly verified).
342 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000343 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000344 return true;
345 }
346
347 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
348 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000349 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000350 return true;
351 }
352
353 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000354 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000355 return true;
356 }
357 PrevOpBank = OpBank;
358 }
359 return false;
360}
361
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000362/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000363/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000364/// and of size \p OpSize.
365/// \returns \p GenericOpc if the combination is unsupported.
366static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
367 unsigned OpSize) {
368 switch (RegBankID) {
369 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000370 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000371 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000372 case TargetOpcode::G_SHL:
373 return AArch64::LSLVWr;
374 case TargetOpcode::G_LSHR:
375 return AArch64::LSRVWr;
376 case TargetOpcode::G_ASHR:
377 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000378 default:
379 return GenericOpc;
380 }
Tim Northover55782222016-10-18 20:03:48 +0000381 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000382 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000383 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000384 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000385 case TargetOpcode::G_SHL:
386 return AArch64::LSLVXr;
387 case TargetOpcode::G_LSHR:
388 return AArch64::LSRVXr;
389 case TargetOpcode::G_ASHR:
390 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000391 default:
392 return GenericOpc;
393 }
394 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000395 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000396 case AArch64::FPRRegBankID:
397 switch (OpSize) {
398 case 32:
399 switch (GenericOpc) {
400 case TargetOpcode::G_FADD:
401 return AArch64::FADDSrr;
402 case TargetOpcode::G_FSUB:
403 return AArch64::FSUBSrr;
404 case TargetOpcode::G_FMUL:
405 return AArch64::FMULSrr;
406 case TargetOpcode::G_FDIV:
407 return AArch64::FDIVSrr;
408 default:
409 return GenericOpc;
410 }
411 case 64:
412 switch (GenericOpc) {
413 case TargetOpcode::G_FADD:
414 return AArch64::FADDDrr;
415 case TargetOpcode::G_FSUB:
416 return AArch64::FSUBDrr;
417 case TargetOpcode::G_FMUL:
418 return AArch64::FMULDrr;
419 case TargetOpcode::G_FDIV:
420 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000421 case TargetOpcode::G_OR:
422 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000423 default:
424 return GenericOpc;
425 }
426 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000427 break;
428 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000429 return GenericOpc;
430}
431
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000432/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
433/// appropriate for the (value) register bank \p RegBankID and of memory access
434/// size \p OpSize. This returns the variant with the base+unsigned-immediate
435/// addressing mode (e.g., LDRXui).
436/// \returns \p GenericOpc if the combination is unsupported.
437static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
438 unsigned OpSize) {
439 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
440 switch (RegBankID) {
441 case AArch64::GPRRegBankID:
442 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000443 case 8:
444 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
445 case 16:
446 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000447 case 32:
448 return isStore ? AArch64::STRWui : AArch64::LDRWui;
449 case 64:
450 return isStore ? AArch64::STRXui : AArch64::LDRXui;
451 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000452 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000453 case AArch64::FPRRegBankID:
454 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000455 case 8:
456 return isStore ? AArch64::STRBui : AArch64::LDRBui;
457 case 16:
458 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000459 case 32:
460 return isStore ? AArch64::STRSui : AArch64::LDRSui;
461 case 64:
462 return isStore ? AArch64::STRDui : AArch64::LDRDui;
463 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000464 break;
465 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000466 return GenericOpc;
467}
468
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000469#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000470/// Helper function that verifies that we have a valid copy at the end of
471/// selectCopy. Verifies that the source and dest have the expected sizes and
472/// then returns true.
473static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
474 const MachineRegisterInfo &MRI,
475 const TargetRegisterInfo &TRI,
476 const RegisterBankInfo &RBI) {
477 const unsigned DstReg = I.getOperand(0).getReg();
478 const unsigned SrcReg = I.getOperand(1).getReg();
479 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
480 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000481
Jessica Paquette245047d2019-01-24 22:00:41 +0000482 // Make sure the size of the source and dest line up.
483 assert(
484 (DstSize == SrcSize ||
485 // Copies are a mean to setup initial types, the number of
486 // bits may not exactly match.
487 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
488 // Copies are a mean to copy bits around, as long as we are
489 // on the same register class, that's fine. Otherwise, that
490 // means we need some SUBREG_TO_REG or AND & co.
491 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
492 "Copy with different width?!");
493
494 // Check the size of the destination.
495 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
496 "GPRs cannot get more than 64-bit width values");
497
498 return true;
499}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000500#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000501
502/// Helper function for selectCopy. Inserts a subregister copy from
503/// \p *From to \p *To, linking it up to \p I.
504///
505/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
506///
507/// CopyReg (From class) = COPY SrcReg
508/// SubRegCopy (To class) = COPY CopyReg:SubReg
509/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000510static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000511 const RegisterBankInfo &RBI, unsigned SrcReg,
512 const TargetRegisterClass *From,
513 const TargetRegisterClass *To,
514 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000515 MachineIRBuilder MIB(I);
516 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000517 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
518 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000519 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000520 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000521
522 // It's possible that the destination register won't be constrained. Make
523 // sure that happens.
524 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
525 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
526
Amara Emersondb211892018-02-20 05:11:57 +0000527 return true;
528}
529
Jessica Paquette910630c2019-05-03 22:37:46 +0000530/// Helper function to get the source and destination register classes for a
531/// copy. Returns a std::pair containing the source register class for the
532/// copy, and the destination register class for the copy. If a register class
533/// cannot be determined, then it will be nullptr.
534static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
535getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
536 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
537 const RegisterBankInfo &RBI) {
538 unsigned DstReg = I.getOperand(0).getReg();
539 unsigned SrcReg = I.getOperand(1).getReg();
540 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
541 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
542 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
543 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
544
545 // Special casing for cross-bank copies of s1s. We can technically represent
546 // a 1-bit value with any size of register. The minimum size for a GPR is 32
547 // bits. So, we need to put the FPR on 32 bits as well.
548 //
549 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
550 // then we can pull it into the helpers that get the appropriate class for a
551 // register bank. Or make a new helper that carries along some constraint
552 // information.
553 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
554 SrcSize = DstSize = 32;
555
556 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
557 getMinClassForRegBank(DstRegBank, DstSize, true)};
558}
559
Quentin Colombetcb629a82016-10-12 03:57:49 +0000560static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
561 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
562 const RegisterBankInfo &RBI) {
563
564 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000565 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000566 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
567 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
Jessica Paquette910630c2019-05-03 22:37:46 +0000568
569 // Find the correct register classes for the source and destination registers.
570 const TargetRegisterClass *SrcRC;
571 const TargetRegisterClass *DstRC;
572 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
573
Jessica Paquette245047d2019-01-24 22:00:41 +0000574 if (!DstRC) {
575 LLVM_DEBUG(dbgs() << "Unexpected dest size "
576 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000577 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000578 }
579
Jessica Paquette245047d2019-01-24 22:00:41 +0000580 // A couple helpers below, for making sure that the copy we produce is valid.
581
582 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
583 // to verify that the src and dst are the same size, since that's handled by
584 // the SUBREG_TO_REG.
585 bool KnownValid = false;
586
587 // Returns true, or asserts if something we don't expect happens. Instead of
588 // returning true, we return isValidCopy() to ensure that we verify the
589 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000590 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000591 // If we have a bitcast or something, we can't have physical registers.
592 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000593 (I.isCopy() ||
594 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
595 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
596 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000597 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000598 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000599 return true;
600 };
601
602 // Is this a copy? If so, then we may need to insert a subregister copy, or
603 // a SUBREG_TO_REG.
604 if (I.isCopy()) {
605 // Yes. Check if there's anything to fix up.
Amara Emerson7e9f3482018-02-18 17:10:49 +0000606 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000607 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
608 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000609 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000610
611 // Is this a cross-bank copy?
612 if (DstRegBank.getID() != SrcRegBank.getID()) {
613 // If we're doing a cross-bank copy on different-sized registers, we need
614 // to do a bit more work.
615 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
616 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
617
618 if (SrcSize > DstSize) {
619 // We're doing a cross-bank copy into a smaller register. We need a
620 // subregister copy. First, get a register class that's on the same bank
621 // as the destination, but the same size as the source.
622 const TargetRegisterClass *SubregRC =
623 getMinClassForRegBank(DstRegBank, SrcSize, true);
624 assert(SubregRC && "Didn't get a register class for subreg?");
625
626 // Get the appropriate subregister for the destination.
627 unsigned SubReg = 0;
628 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
629 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
630 return false;
631 }
632
633 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000634 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000635 return CheckCopy();
636 }
637
638 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
639 SrcSize == 16) {
640 // Special case for FPR16 to GPR32.
641 // FIXME: This can probably be generalized like the above case.
642 unsigned PromoteReg =
643 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
644 BuildMI(*I.getParent(), I, I.getDebugLoc(),
645 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
646 .addImm(0)
647 .addUse(SrcReg)
648 .addImm(AArch64::hsub);
649 MachineOperand &RegOp = I.getOperand(1);
650 RegOp.setReg(PromoteReg);
651
652 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
653 KnownValid = true;
654 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000655 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000656
657 // If the destination is a physical register, then there's nothing to
658 // change, so we're done.
659 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
660 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000661 }
662
Jessica Paquette245047d2019-01-24 22:00:41 +0000663 // No need to constrain SrcReg. It will get constrained when we hit another
664 // of its use or its defs. Copies do not have constraints.
665 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000666 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
667 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000668 return false;
669 }
670 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000671 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000672}
673
Tim Northover69271c62016-10-12 22:49:11 +0000674static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
675 if (!DstTy.isScalar() || !SrcTy.isScalar())
676 return GenericOpc;
677
678 const unsigned DstSize = DstTy.getSizeInBits();
679 const unsigned SrcSize = SrcTy.getSizeInBits();
680
681 switch (DstSize) {
682 case 32:
683 switch (SrcSize) {
684 case 32:
685 switch (GenericOpc) {
686 case TargetOpcode::G_SITOFP:
687 return AArch64::SCVTFUWSri;
688 case TargetOpcode::G_UITOFP:
689 return AArch64::UCVTFUWSri;
690 case TargetOpcode::G_FPTOSI:
691 return AArch64::FCVTZSUWSr;
692 case TargetOpcode::G_FPTOUI:
693 return AArch64::FCVTZUUWSr;
694 default:
695 return GenericOpc;
696 }
697 case 64:
698 switch (GenericOpc) {
699 case TargetOpcode::G_SITOFP:
700 return AArch64::SCVTFUXSri;
701 case TargetOpcode::G_UITOFP:
702 return AArch64::UCVTFUXSri;
703 case TargetOpcode::G_FPTOSI:
704 return AArch64::FCVTZSUWDr;
705 case TargetOpcode::G_FPTOUI:
706 return AArch64::FCVTZUUWDr;
707 default:
708 return GenericOpc;
709 }
710 default:
711 return GenericOpc;
712 }
713 case 64:
714 switch (SrcSize) {
715 case 32:
716 switch (GenericOpc) {
717 case TargetOpcode::G_SITOFP:
718 return AArch64::SCVTFUWDri;
719 case TargetOpcode::G_UITOFP:
720 return AArch64::UCVTFUWDri;
721 case TargetOpcode::G_FPTOSI:
722 return AArch64::FCVTZSUXSr;
723 case TargetOpcode::G_FPTOUI:
724 return AArch64::FCVTZUUXSr;
725 default:
726 return GenericOpc;
727 }
728 case 64:
729 switch (GenericOpc) {
730 case TargetOpcode::G_SITOFP:
731 return AArch64::SCVTFUXDri;
732 case TargetOpcode::G_UITOFP:
733 return AArch64::UCVTFUXDri;
734 case TargetOpcode::G_FPTOSI:
735 return AArch64::FCVTZSUXDr;
736 case TargetOpcode::G_FPTOUI:
737 return AArch64::FCVTZUUXDr;
738 default:
739 return GenericOpc;
740 }
741 default:
742 return GenericOpc;
743 }
744 default:
745 return GenericOpc;
746 };
747 return GenericOpc;
748}
749
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000750static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
751 const RegisterBankInfo &RBI) {
752 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
753 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
754 AArch64::GPRRegBankID);
755 LLT Ty = MRI.getType(I.getOperand(0).getReg());
756 if (Ty == LLT::scalar(32))
757 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
758 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
759 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
760 return 0;
761}
762
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +0000763/// Helper function to select the opcode for a G_FCMP.
764static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
765 // If this is a compare against +0.0, then we don't have to explicitly
766 // materialize a constant.
767 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
768 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
769 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
770 if (OpSize != 32 && OpSize != 64)
771 return 0;
772 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
773 {AArch64::FCMPSri, AArch64::FCMPDri}};
774 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
775}
776
Tim Northover6c02ad52016-10-12 22:49:04 +0000777static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
778 switch (P) {
779 default:
780 llvm_unreachable("Unknown condition code!");
781 case CmpInst::ICMP_NE:
782 return AArch64CC::NE;
783 case CmpInst::ICMP_EQ:
784 return AArch64CC::EQ;
785 case CmpInst::ICMP_SGT:
786 return AArch64CC::GT;
787 case CmpInst::ICMP_SGE:
788 return AArch64CC::GE;
789 case CmpInst::ICMP_SLT:
790 return AArch64CC::LT;
791 case CmpInst::ICMP_SLE:
792 return AArch64CC::LE;
793 case CmpInst::ICMP_UGT:
794 return AArch64CC::HI;
795 case CmpInst::ICMP_UGE:
796 return AArch64CC::HS;
797 case CmpInst::ICMP_ULT:
798 return AArch64CC::LO;
799 case CmpInst::ICMP_ULE:
800 return AArch64CC::LS;
801 }
802}
803
Tim Northover7dd378d2016-10-12 22:49:07 +0000804static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
805 AArch64CC::CondCode &CondCode,
806 AArch64CC::CondCode &CondCode2) {
807 CondCode2 = AArch64CC::AL;
808 switch (P) {
809 default:
810 llvm_unreachable("Unknown FP condition!");
811 case CmpInst::FCMP_OEQ:
812 CondCode = AArch64CC::EQ;
813 break;
814 case CmpInst::FCMP_OGT:
815 CondCode = AArch64CC::GT;
816 break;
817 case CmpInst::FCMP_OGE:
818 CondCode = AArch64CC::GE;
819 break;
820 case CmpInst::FCMP_OLT:
821 CondCode = AArch64CC::MI;
822 break;
823 case CmpInst::FCMP_OLE:
824 CondCode = AArch64CC::LS;
825 break;
826 case CmpInst::FCMP_ONE:
827 CondCode = AArch64CC::MI;
828 CondCode2 = AArch64CC::GT;
829 break;
830 case CmpInst::FCMP_ORD:
831 CondCode = AArch64CC::VC;
832 break;
833 case CmpInst::FCMP_UNO:
834 CondCode = AArch64CC::VS;
835 break;
836 case CmpInst::FCMP_UEQ:
837 CondCode = AArch64CC::EQ;
838 CondCode2 = AArch64CC::VS;
839 break;
840 case CmpInst::FCMP_UGT:
841 CondCode = AArch64CC::HI;
842 break;
843 case CmpInst::FCMP_UGE:
844 CondCode = AArch64CC::PL;
845 break;
846 case CmpInst::FCMP_ULT:
847 CondCode = AArch64CC::LT;
848 break;
849 case CmpInst::FCMP_ULE:
850 CondCode = AArch64CC::LE;
851 break;
852 case CmpInst::FCMP_UNE:
853 CondCode = AArch64CC::NE;
854 break;
855 }
856}
857
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000858bool AArch64InstructionSelector::selectCompareBranch(
859 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
860
861 const unsigned CondReg = I.getOperand(0).getReg();
862 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
863 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000864 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
865 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000866 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
867 return false;
868
869 unsigned LHS = CCMI->getOperand(2).getReg();
870 unsigned RHS = CCMI->getOperand(3).getReg();
871 if (!getConstantVRegVal(RHS, MRI))
872 std::swap(RHS, LHS);
873
874 const auto RHSImm = getConstantVRegVal(RHS, MRI);
875 if (!RHSImm || *RHSImm != 0)
876 return false;
877
878 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
879 if (RB.getID() != AArch64::GPRRegBankID)
880 return false;
881
882 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
883 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
884 return false;
885
886 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
887 unsigned CBOpc = 0;
888 if (CmpWidth <= 32)
889 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
890 else if (CmpWidth == 64)
891 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
892 else
893 return false;
894
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000895 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
896 .addUse(LHS)
897 .addMBB(DestMBB)
898 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000899
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000900 I.eraseFromParent();
901 return true;
902}
903
Amara Emerson9bf092d2019-04-09 21:22:43 +0000904bool AArch64InstructionSelector::selectVectorSHL(
905 MachineInstr &I, MachineRegisterInfo &MRI) const {
906 assert(I.getOpcode() == TargetOpcode::G_SHL);
907 unsigned DstReg = I.getOperand(0).getReg();
908 const LLT Ty = MRI.getType(DstReg);
909 unsigned Src1Reg = I.getOperand(1).getReg();
910 unsigned Src2Reg = I.getOperand(2).getReg();
911
912 if (!Ty.isVector())
913 return false;
914
915 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000916 if (Ty == LLT::vector(4, 32)) {
917 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000918 } else if (Ty == LLT::vector(2, 32)) {
919 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000920 } else {
921 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
922 return false;
923 }
924
925 MachineIRBuilder MIB(I);
926 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
927 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
928 I.eraseFromParent();
929 return true;
930}
931
932bool AArch64InstructionSelector::selectVectorASHR(
933 MachineInstr &I, MachineRegisterInfo &MRI) const {
934 assert(I.getOpcode() == TargetOpcode::G_ASHR);
935 unsigned DstReg = I.getOperand(0).getReg();
936 const LLT Ty = MRI.getType(DstReg);
937 unsigned Src1Reg = I.getOperand(1).getReg();
938 unsigned Src2Reg = I.getOperand(2).getReg();
939
940 if (!Ty.isVector())
941 return false;
942
943 // There is not a shift right register instruction, but the shift left
944 // register instruction takes a signed value, where negative numbers specify a
945 // right shift.
946
947 unsigned Opc = 0;
948 unsigned NegOpc = 0;
949 const TargetRegisterClass *RC = nullptr;
950 if (Ty == LLT::vector(4, 32)) {
951 Opc = AArch64::SSHLv4i32;
952 NegOpc = AArch64::NEGv4i32;
953 RC = &AArch64::FPR128RegClass;
954 } else if (Ty == LLT::vector(2, 32)) {
955 Opc = AArch64::SSHLv2i32;
956 NegOpc = AArch64::NEGv2i32;
957 RC = &AArch64::FPR64RegClass;
958 } else {
959 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
960 return false;
961 }
962
963 MachineIRBuilder MIB(I);
964 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
965 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
966 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
967 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
968 I.eraseFromParent();
969 return true;
970}
971
Tim Northovere9600d82017-02-08 17:57:27 +0000972bool AArch64InstructionSelector::selectVaStartAAPCS(
973 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
974 return false;
975}
976
977bool AArch64InstructionSelector::selectVaStartDarwin(
978 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
979 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
980 unsigned ListReg = I.getOperand(0).getReg();
981
982 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
983
984 auto MIB =
985 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
986 .addDef(ArgsAddrReg)
987 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
988 .addImm(0)
989 .addImm(0);
990
991 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
992
993 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
994 .addUse(ArgsAddrReg)
995 .addUse(ListReg)
996 .addImm(0)
997 .addMemOperand(*I.memoperands_begin());
998
999 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1000 I.eraseFromParent();
1001 return true;
1002}
1003
Amara Emerson1e8c1642018-07-31 00:09:02 +00001004void AArch64InstructionSelector::materializeLargeCMVal(
1005 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1006 MachineBasicBlock &MBB = *I.getParent();
1007 MachineFunction &MF = *MBB.getParent();
1008 MachineRegisterInfo &MRI = MF.getRegInfo();
1009 MachineIRBuilder MIB(I);
1010
Aditya Nandakumarcef44a22018-12-11 00:48:50 +00001011 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +00001012 MovZ->addOperand(MF, I.getOperand(1));
1013 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1014 AArch64II::MO_NC);
1015 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1016 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1017
1018 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
1019 unsigned ForceDstReg) {
1020 unsigned DstReg = ForceDstReg
1021 ? ForceDstReg
1022 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1023 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1024 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1025 MovI->addOperand(MF, MachineOperand::CreateGA(
1026 GV, MovZ->getOperand(1).getOffset(), Flags));
1027 } else {
1028 MovI->addOperand(
1029 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1030 MovZ->getOperand(1).getOffset(), Flags));
1031 }
1032 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1033 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1034 return DstReg;
1035 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +00001036 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +00001037 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1038 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1039 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1040 return;
1041}
1042
Daniel Sandersf76f3152017-11-16 00:46:35 +00001043bool AArch64InstructionSelector::select(MachineInstr &I,
1044 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001045 assert(I.getParent() && "Instruction should be in a basic block!");
1046 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1047
1048 MachineBasicBlock &MBB = *I.getParent();
1049 MachineFunction &MF = *MBB.getParent();
1050 MachineRegisterInfo &MRI = MF.getRegInfo();
1051
Tim Northovercdf23f12016-10-31 18:30:59 +00001052 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001053 // G_PHI requires same handling as PHI
1054 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +00001055 // Certain non-generic instructions also need some special handling.
1056
1057 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1058 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001059
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001060 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +00001061 const unsigned DefReg = I.getOperand(0).getReg();
1062 const LLT DefTy = MRI.getType(DefReg);
1063
1064 const TargetRegisterClass *DefRC = nullptr;
1065 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
1066 DefRC = TRI.getRegClass(DefReg);
1067 } else {
1068 const RegClassOrRegBank &RegClassOrBank =
1069 MRI.getRegClassOrRegBank(DefReg);
1070
1071 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1072 if (!DefRC) {
1073 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001074 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001075 return false;
1076 }
1077 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1078 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1079 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001080 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001081 return false;
1082 }
1083 }
1084 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001085 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001086
1087 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1088 }
1089
1090 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001091 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001092
1093 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001094 }
1095
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001096
1097 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001098 LLVM_DEBUG(
1099 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001100 return false;
1101 }
1102
Daniel Sandersf76f3152017-11-16 00:46:35 +00001103 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001104 return true;
1105
Tim Northover32a078a2016-09-15 10:09:59 +00001106 LLT Ty =
1107 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001108
Amara Emerson3739a202019-03-15 21:59:50 +00001109 MachineIRBuilder MIB(I);
1110
Tim Northover69271c62016-10-12 22:49:11 +00001111 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001112 case TargetOpcode::G_BRCOND: {
1113 if (Ty.getSizeInBits() > 32) {
1114 // We shouldn't need this on AArch64, but it would be implemented as an
1115 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1116 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001117 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1118 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001119 return false;
1120 }
1121
1122 const unsigned CondReg = I.getOperand(0).getReg();
1123 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1124
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001125 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1126 // instructions will not be produced, as they are conditional branch
1127 // instructions that do not set flags.
1128 bool ProduceNonFlagSettingCondBr =
1129 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1130 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001131 return true;
1132
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001133 if (ProduceNonFlagSettingCondBr) {
1134 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1135 .addUse(CondReg)
1136 .addImm(/*bit offset=*/0)
1137 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001138
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001139 I.eraseFromParent();
1140 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1141 } else {
1142 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1143 .addDef(AArch64::WZR)
1144 .addUse(CondReg)
1145 .addImm(1);
1146 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1147 auto Bcc =
1148 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1149 .addImm(AArch64CC::EQ)
1150 .addMBB(DestMBB);
1151
1152 I.eraseFromParent();
1153 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1154 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001155 }
1156
Kristof Beyls65a12c02017-01-30 09:13:18 +00001157 case TargetOpcode::G_BRINDIRECT: {
1158 I.setDesc(TII.get(AArch64::BR));
1159 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1160 }
1161
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001162 case TargetOpcode::G_BSWAP: {
1163 // Handle vector types for G_BSWAP directly.
1164 unsigned DstReg = I.getOperand(0).getReg();
1165 LLT DstTy = MRI.getType(DstReg);
1166
1167 // We should only get vector types here; everything else is handled by the
1168 // importer right now.
1169 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1170 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1171 return false;
1172 }
1173
1174 // Only handle 4 and 2 element vectors for now.
1175 // TODO: 16-bit elements.
1176 unsigned NumElts = DstTy.getNumElements();
1177 if (NumElts != 4 && NumElts != 2) {
1178 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1179 return false;
1180 }
1181
1182 // Choose the correct opcode for the supported types. Right now, that's
1183 // v2s32, v4s32, and v2s64.
1184 unsigned Opc = 0;
1185 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1186 if (EltSize == 32)
1187 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1188 : AArch64::REV32v16i8;
1189 else if (EltSize == 64)
1190 Opc = AArch64::REV64v16i8;
1191
1192 // We should always get something by the time we get here...
1193 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1194
1195 I.setDesc(TII.get(Opc));
1196 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1197 }
1198
Tim Northover4494d692016-10-18 19:47:57 +00001199 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001200 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001201 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1202
Amara Emerson8f25a022019-06-21 16:43:50 +00001203 const LLT s8 = LLT::scalar(8);
1204 const LLT s16 = LLT::scalar(16);
Tim Northover4494d692016-10-18 19:47:57 +00001205 const LLT s32 = LLT::scalar(32);
1206 const LLT s64 = LLT::scalar(64);
1207 const LLT p0 = LLT::pointer(0, 64);
1208
1209 const unsigned DefReg = I.getOperand(0).getReg();
1210 const LLT DefTy = MRI.getType(DefReg);
1211 const unsigned DefSize = DefTy.getSizeInBits();
1212 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1213
1214 // FIXME: Redundant check, but even less readable when factored out.
1215 if (isFP) {
1216 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001217 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1218 << " constant, expected: " << s32 << " or " << s64
1219 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001220 return false;
1221 }
1222
1223 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001224 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1225 << " constant on bank: " << RB
1226 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001227 return false;
1228 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001229
1230 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1231 // can be sure tablegen works correctly and isn't rescued by this code.
1232 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1233 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001234 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001235 // s32 and s64 are covered by tablegen.
Amara Emerson8f25a022019-06-21 16:43:50 +00001236 if (Ty != p0 && Ty != s8 && Ty != s16) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001237 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1238 << " constant, expected: " << s32 << ", " << s64
1239 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001240 return false;
1241 }
1242
1243 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001244 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1245 << " constant on bank: " << RB
1246 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001247 return false;
1248 }
1249 }
1250
Amara Emerson8f25a022019-06-21 16:43:50 +00001251 // We allow G_CONSTANT of types < 32b.
Tim Northover4494d692016-10-18 19:47:57 +00001252 const unsigned MovOpc =
Amara Emerson8f25a022019-06-21 16:43:50 +00001253 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
Tim Northover4494d692016-10-18 19:47:57 +00001254
Tim Northover4494d692016-10-18 19:47:57 +00001255 if (isFP) {
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001256 // Either emit a FMOV, or emit a copy to emit a normal mov.
Tim Northover4494d692016-10-18 19:47:57 +00001257 const TargetRegisterClass &GPRRC =
1258 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1259 const TargetRegisterClass &FPRRC =
1260 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1261
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001262 // Can we use a FMOV instruction to represent the immediate?
1263 if (emitFMovForFConstant(I, MRI))
1264 return true;
1265
1266 // Nope. Emit a copy and use a normal mov instead.
Tim Northover4494d692016-10-18 19:47:57 +00001267 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1268 MachineOperand &RegOp = I.getOperand(0);
1269 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001270 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1271 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001272
1273 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001274 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001275 return false;
1276 }
1277
1278 MachineOperand &ImmOp = I.getOperand(1);
1279 // FIXME: Is going through int64_t always correct?
1280 ImmOp.ChangeToImmediate(
1281 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001282 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001283 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1284 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001285 } else if (I.getOperand(1).isImm()) {
1286 uint64_t Val = I.getOperand(1).getImm();
1287 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001288 }
1289
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001290 I.setDesc(TII.get(MovOpc));
Tim Northover4494d692016-10-18 19:47:57 +00001291 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1292 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001293 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001294 case TargetOpcode::G_EXTRACT: {
1295 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001296 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001297 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001298 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001299 // Larger extracts are vectors, same-size extracts should be something else
1300 // by now (either split up or simplified to a COPY).
1301 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1302 return false;
1303
Amara Emersonbc03bae2018-02-18 17:03:02 +00001304 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001305 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1306 Ty.getSizeInBits() - 1);
1307
Amara Emersonbc03bae2018-02-18 17:03:02 +00001308 if (SrcSize < 64) {
1309 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1310 "unexpected G_EXTRACT types");
1311 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1312 }
1313
Tim Northover7b6d66c2017-07-20 22:58:38 +00001314 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001315 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001316 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1317 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001318 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1319 AArch64::GPR32RegClass, MRI);
1320 I.getOperand(0).setReg(DstReg);
1321
1322 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1323 }
1324
1325 case TargetOpcode::G_INSERT: {
1326 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001327 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1328 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001329 // Larger inserts are vectors, same-size ones should be something else by
1330 // now (split up or turned into COPYs).
1331 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1332 return false;
1333
Amara Emersonbc03bae2018-02-18 17:03:02 +00001334 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001335 unsigned LSB = I.getOperand(3).getImm();
1336 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001337 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001338 MachineInstrBuilder(MF, I).addImm(Width - 1);
1339
Amara Emersonbc03bae2018-02-18 17:03:02 +00001340 if (DstSize < 64) {
1341 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1342 "unexpected G_INSERT types");
1343 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1344 }
1345
Tim Northover7b6d66c2017-07-20 22:58:38 +00001346 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1347 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1348 TII.get(AArch64::SUBREG_TO_REG))
1349 .addDef(SrcReg)
1350 .addImm(0)
1351 .addUse(I.getOperand(2).getReg())
1352 .addImm(AArch64::sub_32);
1353 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1354 AArch64::GPR32RegClass, MRI);
1355 I.getOperand(2).setReg(SrcReg);
1356
1357 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1358 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001359 case TargetOpcode::G_FRAME_INDEX: {
1360 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001361 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001362 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1363 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001364 return false;
1365 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001366 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001367
1368 // MOs for a #0 shifted immediate.
1369 I.addOperand(MachineOperand::CreateImm(0));
1370 I.addOperand(MachineOperand::CreateImm(0));
1371
1372 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1373 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001374
1375 case TargetOpcode::G_GLOBAL_VALUE: {
1376 auto GV = I.getOperand(1).getGlobal();
1377 if (GV->isThreadLocal()) {
1378 // FIXME: we don't support TLS yet.
1379 return false;
1380 }
1381 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001382 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001383 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001384 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001385 } else if (TM.getCodeModel() == CodeModel::Large) {
1386 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001387 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001388 I.eraseFromParent();
1389 return true;
David Green9dd1d452018-08-22 11:31:39 +00001390 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1391 I.setDesc(TII.get(AArch64::ADR));
1392 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001393 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001394 I.setDesc(TII.get(AArch64::MOVaddr));
1395 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1396 MachineInstrBuilder MIB(MF, I);
1397 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1398 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1399 }
1400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1401 }
1402
Amara Emersond3144a42019-06-06 07:58:37 +00001403 case TargetOpcode::G_ZEXTLOAD:
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001404 case TargetOpcode::G_LOAD:
1405 case TargetOpcode::G_STORE: {
Amara Emersond3144a42019-06-06 07:58:37 +00001406 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1407 MachineIRBuilder MIB(I);
1408
Tim Northover0f140c72016-09-09 11:46:34 +00001409 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001410
Tim Northover5ae83502016-09-15 09:20:34 +00001411 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001412 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1413 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001414 return false;
1415 }
1416
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001417 auto &MemOp = **I.memoperands_begin();
1418 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001419 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001420 return false;
1421 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001422 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001423
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001424 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001425#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001426 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001427 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001428 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1429 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001430 assert(MRI.getType(PtrReg).isPointer() &&
1431 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001432#endif
1433
1434 const unsigned ValReg = I.getOperand(0).getReg();
1435 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1436
1437 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001438 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001439 if (NewOpc == I.getOpcode())
1440 return false;
1441
1442 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001443
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001444 uint64_t Offset = 0;
1445 auto *PtrMI = MRI.getVRegDef(PtrReg);
1446
1447 // Try to fold a GEP into our unsigned immediate addressing mode.
1448 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1449 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1450 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001451 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001452 const unsigned Scale = Log2_32(Size);
1453 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1454 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1455 I.getOperand(1).setReg(Ptr2Reg);
1456 PtrMI = MRI.getVRegDef(Ptr2Reg);
1457 Offset = Imm / Size;
1458 }
1459 }
1460 }
1461
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001462 // If we haven't folded anything into our addressing mode yet, try to fold
1463 // a frame index into the base+offset.
1464 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1465 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1466
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001467 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001468
1469 // If we're storing a 0, use WZR/XZR.
1470 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1471 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1472 if (I.getOpcode() == AArch64::STRWui)
1473 I.getOperand(0).setReg(AArch64::WZR);
1474 else if (I.getOpcode() == AArch64::STRXui)
1475 I.getOperand(0).setReg(AArch64::XZR);
1476 }
1477 }
1478
Amara Emersond3144a42019-06-06 07:58:37 +00001479 if (IsZExtLoad) {
1480 // The zextload from a smaller type to i32 should be handled by the importer.
1481 if (MRI.getType(ValReg).getSizeInBits() != 64)
1482 return false;
1483 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1484 //and zero_extend with SUBREG_TO_REG.
1485 unsigned LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1486 unsigned DstReg = I.getOperand(0).getReg();
1487 I.getOperand(0).setReg(LdReg);
1488
1489 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1490 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1491 .addImm(0)
1492 .addUse(LdReg)
1493 .addImm(AArch64::sub_32);
1494 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1495 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1496 MRI);
1497 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001498 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1499 }
1500
Tim Northover9dd78f82017-02-08 21:22:25 +00001501 case TargetOpcode::G_SMULH:
1502 case TargetOpcode::G_UMULH: {
1503 // Reject the various things we don't support yet.
1504 if (unsupportedBinOp(I, RBI, MRI, TRI))
1505 return false;
1506
1507 const unsigned DefReg = I.getOperand(0).getReg();
1508 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1509
1510 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001511 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001512 return false;
1513 }
1514
1515 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001516 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1517 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001518 return false;
1519 }
1520
1521 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1522 : AArch64::UMULHrr;
1523 I.setDesc(TII.get(NewOpc));
1524
1525 // Now that we selected an opcode, we need to constrain the register
1526 // operands to use appropriate classes.
1527 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1528 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001529 case TargetOpcode::G_FADD:
1530 case TargetOpcode::G_FSUB:
1531 case TargetOpcode::G_FMUL:
1532 case TargetOpcode::G_FDIV:
1533
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001534 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001535 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1536 return selectVectorASHR(I, MRI);
1537 LLVM_FALLTHROUGH;
1538 case TargetOpcode::G_SHL:
1539 if (Opcode == TargetOpcode::G_SHL &&
1540 MRI.getType(I.getOperand(0).getReg()).isVector())
1541 return selectVectorSHL(I, MRI);
1542 LLVM_FALLTHROUGH;
1543 case TargetOpcode::G_OR:
1544 case TargetOpcode::G_LSHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001545 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001546 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001547 if (unsupportedBinOp(I, RBI, MRI, TRI))
1548 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001549
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001550 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001551
1552 const unsigned DefReg = I.getOperand(0).getReg();
1553 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1554
1555 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1556 if (NewOpc == I.getOpcode())
1557 return false;
1558
1559 I.setDesc(TII.get(NewOpc));
1560 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001561
1562 // Now that we selected an opcode, we need to constrain the register
1563 // operands to use appropriate classes.
1564 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1565 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001566
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001567 case TargetOpcode::G_UADDO: {
1568 // TODO: Support other types.
1569 unsigned OpSize = Ty.getSizeInBits();
1570 if (OpSize != 32 && OpSize != 64) {
1571 LLVM_DEBUG(
1572 dbgs()
1573 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1574 return false;
1575 }
1576
1577 // TODO: Support vectors.
1578 if (Ty.isVector()) {
1579 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1580 return false;
1581 }
1582
1583 // Add and set the set condition flag.
1584 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1585 MachineIRBuilder MIRBuilder(I);
1586 auto AddsMI = MIRBuilder.buildInstr(
1587 AddsOpc, {I.getOperand(0).getReg()},
1588 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1589 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1590
1591 // Now, put the overflow result in the register given by the first operand
1592 // to the G_UADDO. CSINC increments the result when the predicate is false,
1593 // so to get the increment when it's true, we need to use the inverse. In
1594 // this case, we want to increment when carry is set.
1595 auto CsetMI = MIRBuilder
1596 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1597 {AArch64::WZR, AArch64::WZR})
1598 .addImm(getInvertedCondCode(AArch64CC::HS));
1599 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1600 I.eraseFromParent();
1601 return true;
1602 }
1603
Tim Northover398c5f52017-02-14 20:56:29 +00001604 case TargetOpcode::G_PTR_MASK: {
1605 uint64_t Align = I.getOperand(2).getImm();
1606 if (Align >= 64 || Align == 0)
1607 return false;
1608
1609 uint64_t Mask = ~((1ULL << Align) - 1);
1610 I.setDesc(TII.get(AArch64::ANDXri));
1611 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1612
1613 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1614 }
Tim Northover037af52c2016-10-31 18:31:09 +00001615 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001616 case TargetOpcode::G_TRUNC: {
1617 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1618 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1619
1620 const unsigned DstReg = I.getOperand(0).getReg();
1621 const unsigned SrcReg = I.getOperand(1).getReg();
1622
1623 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1624 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1625
1626 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001627 LLVM_DEBUG(
1628 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001629 return false;
1630 }
1631
1632 if (DstRB.getID() == AArch64::GPRRegBankID) {
1633 const TargetRegisterClass *DstRC =
1634 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1635 if (!DstRC)
1636 return false;
1637
1638 const TargetRegisterClass *SrcRC =
1639 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1640 if (!SrcRC)
1641 return false;
1642
1643 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1644 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001645 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001646 return false;
1647 }
1648
1649 if (DstRC == SrcRC) {
1650 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001651 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1652 SrcTy == LLT::scalar(64)) {
1653 llvm_unreachable("TableGen can import this case");
1654 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001655 } else if (DstRC == &AArch64::GPR32RegClass &&
1656 SrcRC == &AArch64::GPR64RegClass) {
1657 I.getOperand(1).setSubReg(AArch64::sub_32);
1658 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001659 LLVM_DEBUG(
1660 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001661 return false;
1662 }
1663
1664 I.setDesc(TII.get(TargetOpcode::COPY));
1665 return true;
1666 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1667 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1668 I.setDesc(TII.get(AArch64::XTNv4i16));
1669 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1670 return true;
1671 }
1672 }
1673
1674 return false;
1675 }
1676
Tim Northover3d38b3a2016-10-11 20:50:21 +00001677 case TargetOpcode::G_ANYEXT: {
1678 const unsigned DstReg = I.getOperand(0).getReg();
1679 const unsigned SrcReg = I.getOperand(1).getReg();
1680
Quentin Colombetcb629a82016-10-12 03:57:49 +00001681 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1682 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001683 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1684 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001685 return false;
1686 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001687
Quentin Colombetcb629a82016-10-12 03:57:49 +00001688 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1689 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001690 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1691 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001692 return false;
1693 }
1694
1695 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1696
1697 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001698 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001699 return false;
1700 }
1701
Quentin Colombetcb629a82016-10-12 03:57:49 +00001702 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001703 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1704 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001705 return false;
1706 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001707 // At this point G_ANYEXT is just like a plain COPY, but we need
1708 // to explicitly form the 64-bit value if any.
1709 if (DstSize > 32) {
1710 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1711 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1712 .addDef(ExtSrc)
1713 .addImm(0)
1714 .addUse(SrcReg)
1715 .addImm(AArch64::sub_32);
1716 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001717 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001718 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001719 }
1720
1721 case TargetOpcode::G_ZEXT:
1722 case TargetOpcode::G_SEXT: {
1723 unsigned Opcode = I.getOpcode();
1724 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1725 SrcTy = MRI.getType(I.getOperand(1).getReg());
1726 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1727 const unsigned DefReg = I.getOperand(0).getReg();
1728 const unsigned SrcReg = I.getOperand(1).getReg();
1729 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1730
1731 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001732 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1733 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001734 return false;
1735 }
1736
1737 MachineInstr *ExtI;
1738 if (DstTy == LLT::scalar(64)) {
1739 // FIXME: Can we avoid manually doing this?
1740 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001741 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1742 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001743 return false;
1744 }
1745
1746 const unsigned SrcXReg =
1747 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1748 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1749 .addDef(SrcXReg)
1750 .addImm(0)
1751 .addUse(SrcReg)
1752 .addImm(AArch64::sub_32);
1753
1754 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1755 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1756 .addDef(DefReg)
1757 .addUse(SrcXReg)
1758 .addImm(0)
1759 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001760 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001761 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1762 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1763 .addDef(DefReg)
1764 .addUse(SrcReg)
1765 .addImm(0)
1766 .addImm(SrcTy.getSizeInBits() - 1);
1767 } else {
1768 return false;
1769 }
1770
1771 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1772
1773 I.eraseFromParent();
1774 return true;
1775 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001776
Tim Northover69271c62016-10-12 22:49:11 +00001777 case TargetOpcode::G_SITOFP:
1778 case TargetOpcode::G_UITOFP:
1779 case TargetOpcode::G_FPTOSI:
1780 case TargetOpcode::G_FPTOUI: {
1781 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1782 SrcTy = MRI.getType(I.getOperand(1).getReg());
1783 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1784 if (NewOpc == Opcode)
1785 return false;
1786
1787 I.setDesc(TII.get(NewOpc));
1788 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1789
1790 return true;
1791 }
1792
1793
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001794 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001795 // The importer is currently unable to import pointer types since they
1796 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001797 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001798
Daniel Sandersedd07842017-08-17 09:26:14 +00001799 case TargetOpcode::G_BITCAST:
1800 // Imported SelectionDAG rules can handle every bitcast except those that
1801 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00001802 // but we might not run an optimizer that deletes them. The other exception
1803 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1804 // of them.
1805 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00001806
Tim Northover9ac0eba2016-11-08 00:45:29 +00001807 case TargetOpcode::G_SELECT: {
1808 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001809 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1810 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001811 return false;
1812 }
1813
1814 const unsigned CondReg = I.getOperand(1).getReg();
1815 const unsigned TReg = I.getOperand(2).getReg();
1816 const unsigned FReg = I.getOperand(3).getReg();
1817
Jessica Paquette910630c2019-05-03 22:37:46 +00001818 // If we have a floating-point result, then we should use a floating point
1819 // select instead of an integer select.
1820 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
1821 AArch64::GPRRegBankID);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001822
Amara Emersonc37ff0d2019-06-05 23:46:16 +00001823 if (IsFP && tryOptSelect(I))
1824 return true;
Tim Northover9ac0eba2016-11-08 00:45:29 +00001825
Amara Emersonc37ff0d2019-06-05 23:46:16 +00001826 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001827 MachineInstr &TstMI =
1828 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1829 .addDef(AArch64::WZR)
1830 .addUse(CondReg)
1831 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1832
1833 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1834 .addDef(I.getOperand(0).getReg())
1835 .addUse(TReg)
1836 .addUse(FReg)
1837 .addImm(AArch64CC::NE);
1838
1839 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1840 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1841
1842 I.eraseFromParent();
1843 return true;
1844 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001845 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00001846 if (Ty.isVector())
1847 return selectVectorICmp(I, MRI);
1848
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001849 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001850 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1851 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001852 return false;
1853 }
1854
1855 unsigned CmpOpc = 0;
1856 unsigned ZReg = 0;
1857
Jessica Paquette49537bb2019-06-17 18:40:06 +00001858 // Check if this compare can be represented as a cmn, and perform any
1859 // necessary transformations to do so.
1860 if (tryOptCMN(I))
1861 return true;
1862
Tim Northover6c02ad52016-10-12 22:49:04 +00001863 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1864 if (CmpTy == LLT::scalar(32)) {
1865 CmpOpc = AArch64::SUBSWrr;
1866 ZReg = AArch64::WZR;
1867 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1868 CmpOpc = AArch64::SUBSXrr;
1869 ZReg = AArch64::XZR;
1870 } else {
1871 return false;
1872 }
1873
Amara Emerson0d209692019-06-09 07:31:25 +00001874 // Try to match immediate forms.
1875 auto ImmFns = selectArithImmed(I.getOperand(3));
1876 if (ImmFns)
1877 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
1878
Amara Emerson0d209692019-06-09 07:31:25 +00001879 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1880 .addDef(ZReg)
1881 .addUse(I.getOperand(2).getReg());
1882
1883 // If we matched a valid constant immediate, add those operands.
1884 if (ImmFns) {
1885 for (auto &RenderFn : *ImmFns)
1886 RenderFn(CmpMI);
1887 } else {
1888 CmpMI.addUse(I.getOperand(3).getReg());
1889 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001890
Jessica Paquette49537bb2019-06-17 18:40:06 +00001891 MachineIRBuilder MIRBuilder(I);
1892 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
1893 MIRBuilder);
Amara Emerson0d209692019-06-09 07:31:25 +00001894 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover6c02ad52016-10-12 22:49:04 +00001895 I.eraseFromParent();
1896 return true;
1897 }
1898
Tim Northover7dd378d2016-10-12 22:49:07 +00001899 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001900 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001901 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1902 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001903 return false;
1904 }
1905
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001906 unsigned CmpOpc = selectFCMPOpc(I, MRI);
1907 if (!CmpOpc)
Tim Northover7dd378d2016-10-12 22:49:07 +00001908 return false;
Tim Northover7dd378d2016-10-12 22:49:07 +00001909
1910 // FIXME: regbank
1911
1912 AArch64CC::CondCode CC1, CC2;
1913 changeFCMPPredToAArch64CC(
1914 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1915
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001916 // Partially build the compare. Decide if we need to add a use for the
1917 // third operand based off whether or not we're comparing against 0.0.
1918 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1919 .addUse(I.getOperand(2).getReg());
1920
1921 // If we don't have an immediate compare, then we need to add a use of the
1922 // register which wasn't used for the immediate.
1923 // Note that the immediate will always be the last operand.
1924 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
1925 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
Tim Northover7dd378d2016-10-12 22:49:07 +00001926
1927 const unsigned DefReg = I.getOperand(0).getReg();
1928 unsigned Def1Reg = DefReg;
1929 if (CC2 != AArch64CC::AL)
1930 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1931
1932 MachineInstr &CSetMI =
1933 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1934 .addDef(Def1Reg)
1935 .addUse(AArch64::WZR)
1936 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001937 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001938
1939 if (CC2 != AArch64CC::AL) {
1940 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1941 MachineInstr &CSet2MI =
1942 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1943 .addDef(Def2Reg)
1944 .addUse(AArch64::WZR)
1945 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001946 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001947 MachineInstr &OrMI =
1948 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1949 .addDef(DefReg)
1950 .addUse(Def1Reg)
1951 .addUse(Def2Reg);
1952 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1953 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1954 }
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001955 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover7dd378d2016-10-12 22:49:07 +00001956 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1957
1958 I.eraseFromParent();
1959 return true;
1960 }
Tim Northovere9600d82017-02-08 17:57:27 +00001961 case TargetOpcode::G_VASTART:
1962 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1963 : selectVaStartAAPCS(I, MF, MRI);
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00001964 case TargetOpcode::G_INTRINSIC:
1965 return selectIntrinsic(I, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001966 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001967 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001968 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001969 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001970 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1971 const unsigned DstReg = I.getOperand(0).getReg();
1972 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1973 const TargetRegisterClass *DstRC =
1974 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1975 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001976 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001977 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001978 case TargetOpcode::G_BLOCK_ADDR: {
1979 if (TM.getCodeModel() == CodeModel::Large) {
1980 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1981 I.eraseFromParent();
1982 return true;
1983 } else {
1984 I.setDesc(TII.get(AArch64::MOVaddrBA));
1985 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1986 I.getOperand(0).getReg())
1987 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1988 /* Offset */ 0, AArch64II::MO_PAGE)
1989 .addBlockAddress(
1990 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1991 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1992 I.eraseFromParent();
1993 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1994 }
1995 }
Jessica Paquette991cb392019-04-23 20:46:19 +00001996 case TargetOpcode::G_INTRINSIC_TRUNC:
1997 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00001998 case TargetOpcode::G_INTRINSIC_ROUND:
1999 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00002000 case TargetOpcode::G_BUILD_VECTOR:
2001 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002002 case TargetOpcode::G_MERGE_VALUES:
2003 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002004 case TargetOpcode::G_UNMERGE_VALUES:
2005 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002006 case TargetOpcode::G_SHUFFLE_VECTOR:
2007 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00002008 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2009 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002010 case TargetOpcode::G_INSERT_VECTOR_ELT:
2011 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00002012 case TargetOpcode::G_CONCAT_VECTORS:
2013 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002014 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002015
2016 return false;
2017}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002018
Jessica Paquette991cb392019-04-23 20:46:19 +00002019bool AArch64InstructionSelector::selectIntrinsicTrunc(
2020 MachineInstr &I, MachineRegisterInfo &MRI) const {
2021 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2022
2023 // Select the correct opcode.
2024 unsigned Opc = 0;
2025 if (!SrcTy.isVector()) {
2026 switch (SrcTy.getSizeInBits()) {
2027 default:
2028 case 16:
2029 Opc = AArch64::FRINTZHr;
2030 break;
2031 case 32:
2032 Opc = AArch64::FRINTZSr;
2033 break;
2034 case 64:
2035 Opc = AArch64::FRINTZDr;
2036 break;
2037 }
2038 } else {
2039 unsigned NumElts = SrcTy.getNumElements();
2040 switch (SrcTy.getElementType().getSizeInBits()) {
2041 default:
2042 break;
2043 case 16:
2044 if (NumElts == 4)
2045 Opc = AArch64::FRINTZv4f16;
2046 else if (NumElts == 8)
2047 Opc = AArch64::FRINTZv8f16;
2048 break;
2049 case 32:
2050 if (NumElts == 2)
2051 Opc = AArch64::FRINTZv2f32;
2052 else if (NumElts == 4)
2053 Opc = AArch64::FRINTZv4f32;
2054 break;
2055 case 64:
2056 if (NumElts == 2)
2057 Opc = AArch64::FRINTZv2f64;
2058 break;
2059 }
2060 }
2061
2062 if (!Opc) {
2063 // Didn't get an opcode above, bail.
2064 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2065 return false;
2066 }
2067
2068 // Legalization would have set us up perfectly for this; we just need to
2069 // set the opcode and move on.
2070 I.setDesc(TII.get(Opc));
2071 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2072}
2073
Jessica Paquette4fe75742019-04-23 23:03:03 +00002074bool AArch64InstructionSelector::selectIntrinsicRound(
2075 MachineInstr &I, MachineRegisterInfo &MRI) const {
2076 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2077
2078 // Select the correct opcode.
2079 unsigned Opc = 0;
2080 if (!SrcTy.isVector()) {
2081 switch (SrcTy.getSizeInBits()) {
2082 default:
2083 case 16:
2084 Opc = AArch64::FRINTAHr;
2085 break;
2086 case 32:
2087 Opc = AArch64::FRINTASr;
2088 break;
2089 case 64:
2090 Opc = AArch64::FRINTADr;
2091 break;
2092 }
2093 } else {
2094 unsigned NumElts = SrcTy.getNumElements();
2095 switch (SrcTy.getElementType().getSizeInBits()) {
2096 default:
2097 break;
2098 case 16:
2099 if (NumElts == 4)
2100 Opc = AArch64::FRINTAv4f16;
2101 else if (NumElts == 8)
2102 Opc = AArch64::FRINTAv8f16;
2103 break;
2104 case 32:
2105 if (NumElts == 2)
2106 Opc = AArch64::FRINTAv2f32;
2107 else if (NumElts == 4)
2108 Opc = AArch64::FRINTAv4f32;
2109 break;
2110 case 64:
2111 if (NumElts == 2)
2112 Opc = AArch64::FRINTAv2f64;
2113 break;
2114 }
2115 }
2116
2117 if (!Opc) {
2118 // Didn't get an opcode above, bail.
2119 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2120 return false;
2121 }
2122
2123 // Legalization would have set us up perfectly for this; we just need to
2124 // set the opcode and move on.
2125 I.setDesc(TII.get(Opc));
2126 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2127}
2128
Amara Emerson9bf092d2019-04-09 21:22:43 +00002129bool AArch64InstructionSelector::selectVectorICmp(
2130 MachineInstr &I, MachineRegisterInfo &MRI) const {
2131 unsigned DstReg = I.getOperand(0).getReg();
2132 LLT DstTy = MRI.getType(DstReg);
2133 unsigned SrcReg = I.getOperand(2).getReg();
2134 unsigned Src2Reg = I.getOperand(3).getReg();
2135 LLT SrcTy = MRI.getType(SrcReg);
2136
2137 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2138 unsigned NumElts = DstTy.getNumElements();
2139
2140 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2141 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2142 // Third index is cc opcode:
2143 // 0 == eq
2144 // 1 == ugt
2145 // 2 == uge
2146 // 3 == ult
2147 // 4 == ule
2148 // 5 == sgt
2149 // 6 == sge
2150 // 7 == slt
2151 // 8 == sle
2152 // ne is done by negating 'eq' result.
2153
2154 // This table below assumes that for some comparisons the operands will be
2155 // commuted.
2156 // ult op == commute + ugt op
2157 // ule op == commute + uge op
2158 // slt op == commute + sgt op
2159 // sle op == commute + sge op
2160 unsigned PredIdx = 0;
2161 bool SwapOperands = false;
2162 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2163 switch (Pred) {
2164 case CmpInst::ICMP_NE:
2165 case CmpInst::ICMP_EQ:
2166 PredIdx = 0;
2167 break;
2168 case CmpInst::ICMP_UGT:
2169 PredIdx = 1;
2170 break;
2171 case CmpInst::ICMP_UGE:
2172 PredIdx = 2;
2173 break;
2174 case CmpInst::ICMP_ULT:
2175 PredIdx = 3;
2176 SwapOperands = true;
2177 break;
2178 case CmpInst::ICMP_ULE:
2179 PredIdx = 4;
2180 SwapOperands = true;
2181 break;
2182 case CmpInst::ICMP_SGT:
2183 PredIdx = 5;
2184 break;
2185 case CmpInst::ICMP_SGE:
2186 PredIdx = 6;
2187 break;
2188 case CmpInst::ICMP_SLT:
2189 PredIdx = 7;
2190 SwapOperands = true;
2191 break;
2192 case CmpInst::ICMP_SLE:
2193 PredIdx = 8;
2194 SwapOperands = true;
2195 break;
2196 default:
2197 llvm_unreachable("Unhandled icmp predicate");
2198 return false;
2199 }
2200
2201 // This table obviously should be tablegen'd when we have our GISel native
2202 // tablegen selector.
2203
2204 static const unsigned OpcTable[4][4][9] = {
2205 {
2206 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2207 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2208 0 /* invalid */},
2209 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2210 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2211 0 /* invalid */},
2212 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2213 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2214 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2215 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2216 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2217 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2218 },
2219 {
2220 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2221 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2222 0 /* invalid */},
2223 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2224 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2225 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2226 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2227 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2228 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2229 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2230 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2231 0 /* invalid */}
2232 },
2233 {
2234 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2235 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2236 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2237 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2238 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2239 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2240 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2241 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2242 0 /* invalid */},
2243 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2244 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2245 0 /* invalid */}
2246 },
2247 {
2248 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2249 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2250 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2251 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2252 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2253 0 /* invalid */},
2254 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2255 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2256 0 /* invalid */},
2257 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2258 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2259 0 /* invalid */}
2260 },
2261 };
2262 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2263 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2264 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2265 if (!Opc) {
2266 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2267 return false;
2268 }
2269
2270 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2271 const TargetRegisterClass *SrcRC =
2272 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2273 if (!SrcRC) {
2274 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2275 return false;
2276 }
2277
2278 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2279 if (SrcTy.getSizeInBits() == 128)
2280 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2281
2282 if (SwapOperands)
2283 std::swap(SrcReg, Src2Reg);
2284
2285 MachineIRBuilder MIB(I);
2286 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2287 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2288
2289 // Invert if we had a 'ne' cc.
2290 if (NotOpc) {
2291 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2292 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2293 } else {
2294 MIB.buildCopy(DstReg, Cmp.getReg(0));
2295 }
2296 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2297 I.eraseFromParent();
2298 return true;
2299}
2300
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002301MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00002302 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002303 MachineIRBuilder &MIRBuilder) const {
2304 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002305
2306 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002307 auto Ins =
2308 MIRBuilder
2309 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2310 .addImm(SubregIndex);
2311 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2312 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2313 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002314 };
2315
Amara Emerson8acb0d92019-03-04 19:16:00 +00002316 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002317 case 16:
2318 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002319 case 32:
2320 return BuildFn(AArch64::ssub);
2321 case 64:
2322 return BuildFn(AArch64::dsub);
2323 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002324 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002325 }
2326}
2327
Amara Emerson8cb186c2018-12-20 01:11:04 +00002328bool AArch64InstructionSelector::selectMergeValues(
2329 MachineInstr &I, MachineRegisterInfo &MRI) const {
2330 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2331 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2332 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2333 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2334
2335 // At the moment we only support merging two s32s into an s64.
2336 if (I.getNumOperands() != 3)
2337 return false;
2338 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2339 return false;
2340 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2341 if (RB.getID() != AArch64::GPRRegBankID)
2342 return false;
2343
2344 auto *DstRC = &AArch64::GPR64RegClass;
2345 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
2346 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2347 TII.get(TargetOpcode::SUBREG_TO_REG))
2348 .addDef(SubToRegDef)
2349 .addImm(0)
2350 .addUse(I.getOperand(1).getReg())
2351 .addImm(AArch64::sub_32);
2352 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2353 // Need to anyext the second scalar before we can use bfm
2354 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2355 TII.get(TargetOpcode::SUBREG_TO_REG))
2356 .addDef(SubToRegDef2)
2357 .addImm(0)
2358 .addUse(I.getOperand(2).getReg())
2359 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002360 MachineInstr &BFM =
2361 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002362 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002363 .addUse(SubToRegDef)
2364 .addUse(SubToRegDef2)
2365 .addImm(32)
2366 .addImm(31);
2367 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2368 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2369 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2370 I.eraseFromParent();
2371 return true;
2372}
2373
Jessica Paquette607774c2019-03-11 22:18:01 +00002374static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2375 const unsigned EltSize) {
2376 // Choose a lane copy opcode and subregister based off of the size of the
2377 // vector's elements.
2378 switch (EltSize) {
2379 case 16:
2380 CopyOpc = AArch64::CPYi16;
2381 ExtractSubReg = AArch64::hsub;
2382 break;
2383 case 32:
2384 CopyOpc = AArch64::CPYi32;
2385 ExtractSubReg = AArch64::ssub;
2386 break;
2387 case 64:
2388 CopyOpc = AArch64::CPYi64;
2389 ExtractSubReg = AArch64::dsub;
2390 break;
2391 default:
2392 // Unknown size, bail out.
2393 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2394 return false;
2395 }
2396 return true;
2397}
2398
Amara Emersond61b89b2019-03-14 22:48:18 +00002399MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2400 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2401 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2402 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2403 unsigned CopyOpc = 0;
2404 unsigned ExtractSubReg = 0;
2405 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2406 LLVM_DEBUG(
2407 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2408 return nullptr;
2409 }
2410
2411 const TargetRegisterClass *DstRC =
2412 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2413 if (!DstRC) {
2414 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2415 return nullptr;
2416 }
2417
2418 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2419 const LLT &VecTy = MRI.getType(VecReg);
2420 const TargetRegisterClass *VecRC =
2421 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2422 if (!VecRC) {
2423 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2424 return nullptr;
2425 }
2426
2427 // The register that we're going to copy into.
2428 unsigned InsertReg = VecReg;
2429 if (!DstReg)
2430 DstReg = MRI.createVirtualRegister(DstRC);
2431 // If the lane index is 0, we just use a subregister COPY.
2432 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002433 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2434 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002435 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002436 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002437 }
2438
2439 // Lane copies require 128-bit wide registers. If we're dealing with an
2440 // unpacked vector, then we need to move up to that width. Insert an implicit
2441 // def and a subregister insert to get us there.
2442 if (VecTy.getSizeInBits() != 128) {
2443 MachineInstr *ScalarToVector = emitScalarToVector(
2444 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2445 if (!ScalarToVector)
2446 return nullptr;
2447 InsertReg = ScalarToVector->getOperand(0).getReg();
2448 }
2449
2450 MachineInstr *LaneCopyMI =
2451 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2452 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2453
2454 // Make sure that we actually constrain the initial copy.
2455 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2456 return LaneCopyMI;
2457}
2458
Jessica Paquette607774c2019-03-11 22:18:01 +00002459bool AArch64InstructionSelector::selectExtractElt(
2460 MachineInstr &I, MachineRegisterInfo &MRI) const {
2461 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2462 "unexpected opcode!");
2463 unsigned DstReg = I.getOperand(0).getReg();
2464 const LLT NarrowTy = MRI.getType(DstReg);
2465 const unsigned SrcReg = I.getOperand(1).getReg();
2466 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002467 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002468 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2469 "source register size too small!");
2470 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2471
2472 // Need the lane index to determine the correct copy opcode.
2473 MachineOperand &LaneIdxOp = I.getOperand(2);
2474 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2475
2476 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2477 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2478 return false;
2479 }
2480
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002481 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002482 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2483 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002484 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002485 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002486
Jessica Paquette607774c2019-03-11 22:18:01 +00002487 MachineIRBuilder MIRBuilder(I);
2488
Amara Emersond61b89b2019-03-14 22:48:18 +00002489 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2490 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2491 LaneIdx, MIRBuilder);
2492 if (!Extract)
2493 return false;
2494
2495 I.eraseFromParent();
2496 return true;
2497}
2498
2499bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2500 MachineInstr &I, MachineRegisterInfo &MRI) const {
2501 unsigned NumElts = I.getNumOperands() - 1;
2502 unsigned SrcReg = I.getOperand(NumElts).getReg();
2503 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2504 const LLT SrcTy = MRI.getType(SrcReg);
2505
2506 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2507 if (SrcTy.getSizeInBits() > 128) {
2508 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2509 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002510 }
2511
Amara Emersond61b89b2019-03-14 22:48:18 +00002512 MachineIRBuilder MIB(I);
2513
2514 // We implement a split vector operation by treating the sub-vectors as
2515 // scalars and extracting them.
2516 const RegisterBank &DstRB =
2517 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2518 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2519 unsigned Dst = I.getOperand(OpIdx).getReg();
2520 MachineInstr *Extract =
2521 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2522 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002523 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002524 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002525 I.eraseFromParent();
2526 return true;
2527}
2528
Jessica Paquette245047d2019-01-24 22:00:41 +00002529bool AArch64InstructionSelector::selectUnmergeValues(
2530 MachineInstr &I, MachineRegisterInfo &MRI) const {
2531 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2532 "unexpected opcode");
2533
2534 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2535 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2536 AArch64::FPRRegBankID ||
2537 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2538 AArch64::FPRRegBankID) {
2539 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2540 "currently unsupported.\n");
2541 return false;
2542 }
2543
2544 // The last operand is the vector source register, and every other operand is
2545 // a register to unpack into.
2546 unsigned NumElts = I.getNumOperands() - 1;
2547 unsigned SrcReg = I.getOperand(NumElts).getReg();
2548 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2549 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002550 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002551 assert(WideTy.isVector() && "can only unmerge from vector types!");
2552 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2553 "source register size too small!");
2554
Amara Emersond61b89b2019-03-14 22:48:18 +00002555 if (!NarrowTy.isScalar())
2556 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002557
Amara Emerson3739a202019-03-15 21:59:50 +00002558 MachineIRBuilder MIB(I);
2559
Jessica Paquette245047d2019-01-24 22:00:41 +00002560 // Choose a lane copy opcode and subregister based off of the size of the
2561 // vector's elements.
2562 unsigned CopyOpc = 0;
2563 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002564 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002565 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002566
2567 // Set up for the lane copies.
2568 MachineBasicBlock &MBB = *I.getParent();
2569
2570 // Stores the registers we'll be copying from.
2571 SmallVector<unsigned, 4> InsertRegs;
2572
2573 // We'll use the first register twice, so we only need NumElts-1 registers.
2574 unsigned NumInsertRegs = NumElts - 1;
2575
2576 // If our elements fit into exactly 128 bits, then we can copy from the source
2577 // directly. Otherwise, we need to do a bit of setup with some subregister
2578 // inserts.
2579 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2580 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2581 } else {
2582 // No. We have to perform subregister inserts. For each insert, create an
2583 // implicit def and a subregister insert, and save the register we create.
2584 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2585 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2586 MachineInstr &ImpDefMI =
2587 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2588 ImpDefReg);
2589
2590 // Now, create the subregister insert from SrcReg.
2591 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2592 MachineInstr &InsMI =
2593 *BuildMI(MBB, I, I.getDebugLoc(),
2594 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2595 .addUse(ImpDefReg)
2596 .addUse(SrcReg)
2597 .addImm(AArch64::dsub);
2598
2599 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2600 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2601
2602 // Save the register so that we can copy from it after.
2603 InsertRegs.push_back(InsertReg);
2604 }
2605 }
2606
2607 // Now that we've created any necessary subregister inserts, we can
2608 // create the copies.
2609 //
2610 // Perform the first copy separately as a subregister copy.
2611 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002612 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2613 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002614 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002615
2616 // Now, perform the remaining copies as vector lane copies.
2617 unsigned LaneIdx = 1;
2618 for (unsigned InsReg : InsertRegs) {
2619 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2620 MachineInstr &CopyInst =
2621 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2622 .addUse(InsReg)
2623 .addImm(LaneIdx);
2624 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2625 ++LaneIdx;
2626 }
2627
2628 // Separately constrain the first copy's destination. Because of the
2629 // limitation in constrainOperandRegClass, we can't guarantee that this will
2630 // actually be constrained. So, do it ourselves using the second operand.
2631 const TargetRegisterClass *RC =
2632 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2633 if (!RC) {
2634 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2635 return false;
2636 }
2637
2638 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2639 I.eraseFromParent();
2640 return true;
2641}
2642
Amara Emerson2ff22982019-03-14 22:48:15 +00002643bool AArch64InstructionSelector::selectConcatVectors(
2644 MachineInstr &I, MachineRegisterInfo &MRI) const {
2645 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2646 "Unexpected opcode");
2647 unsigned Dst = I.getOperand(0).getReg();
2648 unsigned Op1 = I.getOperand(1).getReg();
2649 unsigned Op2 = I.getOperand(2).getReg();
2650 MachineIRBuilder MIRBuilder(I);
2651 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2652 if (!ConcatMI)
2653 return false;
2654 I.eraseFromParent();
2655 return true;
2656}
2657
Amara Emerson1abe05c2019-02-21 20:20:16 +00002658void AArch64InstructionSelector::collectShuffleMaskIndices(
2659 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00002660 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00002661 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2662 assert(
2663 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2664 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2665 // Find the constant indices.
2666 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2667 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2668 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2669 // Look through copies.
2670 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2671 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2672 assert(ScalarDef && "Could not find def of copy operand");
2673 }
Amara Emerson2806fd02019-04-12 21:31:21 +00002674 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2675 // This be an undef if not a constant.
2676 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2677 Idxs.push_back(None);
2678 } else {
2679 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2680 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002681 }
2682}
2683
2684unsigned
2685AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2686 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00002687 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00002688 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2689 if (Align == 0)
2690 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2691
2692 MachineConstantPool *MCP = MF.getConstantPool();
2693 return MCP->getConstantPoolIndex(CPVal, Align);
2694}
2695
2696MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2697 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2698 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2699
2700 auto Adrp =
2701 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2702 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002703
2704 MachineInstr *LoadMI = nullptr;
2705 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2706 case 16:
2707 LoadMI =
2708 &*MIRBuilder
2709 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2710 .addConstantPoolIndex(CPIdx, 0,
2711 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2712 break;
2713 case 8:
2714 LoadMI = &*MIRBuilder
2715 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2716 .addConstantPoolIndex(
2717 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2718 break;
2719 default:
2720 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2721 << *CPVal->getType());
2722 return nullptr;
2723 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002724 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002725 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2726 return LoadMI;
2727}
2728
2729/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2730/// size and RB.
2731static std::pair<unsigned, unsigned>
2732getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2733 unsigned Opc, SubregIdx;
2734 if (RB.getID() == AArch64::GPRRegBankID) {
2735 if (EltSize == 32) {
2736 Opc = AArch64::INSvi32gpr;
2737 SubregIdx = AArch64::ssub;
2738 } else if (EltSize == 64) {
2739 Opc = AArch64::INSvi64gpr;
2740 SubregIdx = AArch64::dsub;
2741 } else {
2742 llvm_unreachable("invalid elt size!");
2743 }
2744 } else {
2745 if (EltSize == 8) {
2746 Opc = AArch64::INSvi8lane;
2747 SubregIdx = AArch64::bsub;
2748 } else if (EltSize == 16) {
2749 Opc = AArch64::INSvi16lane;
2750 SubregIdx = AArch64::hsub;
2751 } else if (EltSize == 32) {
2752 Opc = AArch64::INSvi32lane;
2753 SubregIdx = AArch64::ssub;
2754 } else if (EltSize == 64) {
2755 Opc = AArch64::INSvi64lane;
2756 SubregIdx = AArch64::dsub;
2757 } else {
2758 llvm_unreachable("invalid elt size!");
2759 }
2760 }
2761 return std::make_pair(Opc, SubregIdx);
2762}
2763
2764MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002765 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2766 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002767 // We implement a vector concat by:
2768 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2769 // 2. Insert the upper vector into the destination's upper element
2770 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2771 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2772
2773 const LLT Op1Ty = MRI.getType(Op1);
2774 const LLT Op2Ty = MRI.getType(Op2);
2775
2776 if (Op1Ty != Op2Ty) {
2777 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2778 return nullptr;
2779 }
2780 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2781
2782 if (Op1Ty.getSizeInBits() >= 128) {
2783 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2784 return nullptr;
2785 }
2786
2787 // At the moment we just support 64 bit vector concats.
2788 if (Op1Ty.getSizeInBits() != 64) {
2789 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2790 return nullptr;
2791 }
2792
2793 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2794 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2795 const TargetRegisterClass *DstRC =
2796 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2797
2798 MachineInstr *WidenedOp1 =
2799 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2800 MachineInstr *WidenedOp2 =
2801 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2802 if (!WidenedOp1 || !WidenedOp2) {
2803 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2804 return nullptr;
2805 }
2806
2807 // Now do the insert of the upper element.
2808 unsigned InsertOpc, InsSubRegIdx;
2809 std::tie(InsertOpc, InsSubRegIdx) =
2810 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2811
Amara Emerson2ff22982019-03-14 22:48:15 +00002812 if (!Dst)
2813 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002814 auto InsElt =
2815 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002816 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002817 .addImm(1) /* Lane index */
2818 .addUse(WidenedOp2->getOperand(0).getReg())
2819 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002820 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2821 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002822}
2823
Jessica Paquettea3843fe2019-05-01 22:39:43 +00002824MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
2825 MachineInstr &I, MachineRegisterInfo &MRI) const {
2826 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
2827 "Expected a G_FCONSTANT!");
2828 MachineOperand &ImmOp = I.getOperand(1);
2829 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
2830
2831 // Only handle 32 and 64 bit defs for now.
2832 if (DefSize != 32 && DefSize != 64)
2833 return nullptr;
2834
2835 // Don't handle null values using FMOV.
2836 if (ImmOp.getFPImm()->isNullValue())
2837 return nullptr;
2838
2839 // Get the immediate representation for the FMOV.
2840 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
2841 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
2842 : AArch64_AM::getFP64Imm(ImmValAPF);
2843
2844 // If this is -1, it means the immediate can't be represented as the requested
2845 // floating point value. Bail.
2846 if (Imm == -1)
2847 return nullptr;
2848
2849 // Update MI to represent the new FMOV instruction, constrain it, and return.
2850 ImmOp.ChangeToImmediate(Imm);
2851 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
2852 I.setDesc(TII.get(MovOpc));
2853 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2854 return &I;
2855}
2856
Jessica Paquette49537bb2019-06-17 18:40:06 +00002857MachineInstr *
2858AArch64InstructionSelector::emitCSetForICMP(unsigned DefReg, unsigned Pred,
2859 MachineIRBuilder &MIRBuilder) const {
2860 // CSINC increments the result when the predicate is false. Invert it.
2861 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
2862 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
2863 auto I =
2864 MIRBuilder
2865 .buildInstr(AArch64::CSINCWr, {DefReg}, {AArch64::WZR, AArch64::WZR})
2866 .addImm(InvCC);
2867 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
2868 return &*I;
2869}
2870
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002871bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
2872 MachineIRBuilder MIB(I);
2873 MachineRegisterInfo &MRI = *MIB.getMRI();
2874 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2875
2876 // We want to recognize this pattern:
2877 //
2878 // $z = G_FCMP pred, $x, $y
2879 // ...
2880 // $w = G_SELECT $z, $a, $b
2881 //
2882 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
2883 // some copies/truncs in between.)
2884 //
2885 // If we see this, then we can emit something like this:
2886 //
2887 // fcmp $x, $y
2888 // fcsel $w, $a, $b, pred
2889 //
2890 // Rather than emitting both of the rather long sequences in the standard
2891 // G_FCMP/G_SELECT select methods.
2892
2893 // First, check if the condition is defined by a compare.
2894 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
2895 while (CondDef) {
2896 // We can only fold if all of the defs have one use.
2897 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
2898 return false;
2899
2900 // We can skip over G_TRUNC since the condition is 1-bit.
2901 // Truncating/extending can have no impact on the value.
2902 unsigned Opc = CondDef->getOpcode();
2903 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
2904 break;
2905
Amara Emersond940e202019-06-06 07:33:47 +00002906 // Can't see past copies from physregs.
2907 if (Opc == TargetOpcode::COPY &&
2908 TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
2909 return false;
2910
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002911 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
2912 }
2913
2914 // Is the condition defined by a compare?
2915 // TODO: Handle G_ICMP.
2916 if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
2917 return false;
2918
2919 // Get the condition code for the select.
2920 AArch64CC::CondCode CondCode;
2921 AArch64CC::CondCode CondCode2;
2922 changeFCMPPredToAArch64CC(
2923 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
2924 CondCode2);
2925
2926 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
2927 // instructions to emit the comparison.
2928 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
2929 // unnecessary.
2930 if (CondCode2 != AArch64CC::AL)
2931 return false;
2932
2933 // Make sure we'll be able to select the compare.
2934 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
2935 if (!CmpOpc)
2936 return false;
2937
2938 // Emit a new compare.
2939 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
2940 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2941 Cmp.addUse(CondDef->getOperand(3).getReg());
2942
2943 // Emit the select.
2944 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
2945 auto CSel =
2946 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
2947 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
2948 .addImm(CondCode);
2949 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2950 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
2951 I.eraseFromParent();
2952 return true;
2953}
2954
Jessica Paquette49537bb2019-06-17 18:40:06 +00002955bool AArch64InstructionSelector::tryOptCMN(MachineInstr &I) const {
2956 assert(I.getOpcode() == TargetOpcode::G_ICMP && "Expected G_ICMP");
2957 MachineIRBuilder MIRBuilder(I);
2958 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2959 // We want to find this sort of thing:
2960 // x = G_SUB 0, y
2961 // G_ICMP z, x
2962 //
2963 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
2964 // e.g:
2965 //
2966 // cmn z, y
2967
2968 // Helper lambda to find the def.
2969 auto FindDef = [&](unsigned VReg) {
2970 MachineInstr *Def = MRI.getVRegDef(VReg);
2971 while (Def) {
2972 if (Def->getOpcode() != TargetOpcode::COPY)
2973 break;
2974 // Copies can be from physical registers. If we hit this, we're done.
2975 if (TargetRegisterInfo::isPhysicalRegister(Def->getOperand(1).getReg()))
2976 break;
2977 Def = MRI.getVRegDef(Def->getOperand(1).getReg());
2978 }
2979 return Def;
2980 };
2981
2982 // Helper lambda to detect the subtract followed by the compare.
2983 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
2984 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
2985 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
2986 return false;
2987
2988 // Need to make sure NZCV is the same at the end of the transformation.
2989 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
2990 return false;
2991
2992 // We want to match against SUBs.
2993 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
2994 return false;
2995
2996 // Make sure that we're getting
2997 // x = G_SUB 0, y
2998 auto ValAndVReg =
2999 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3000 if (!ValAndVReg || ValAndVReg->Value != 0)
3001 return false;
3002
3003 // This can safely be represented as a CMN.
3004 return true;
3005 };
3006
3007 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3008 MachineInstr *LHSDef = FindDef(I.getOperand(2).getReg());
3009 MachineInstr *RHSDef = FindDef(I.getOperand(3).getReg());
3010 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
3011 (CmpInst::Predicate)I.getOperand(1).getPredicate());
3012 bool DidFold = false;
3013 if (IsCMN(LHSDef, CC)) {
3014 // We're doing this:
3015 //
3016 // Given:
3017 //
3018 // x = G_SUB 0, y
3019 // G_ICMP x, z
3020 //
3021 // Update the G_ICMP:
3022 //
3023 // G_ICMP y, z
3024 I.getOperand(2).setReg(LHSDef->getOperand(2).getReg());
3025 DidFold = true;
3026 } else if (IsCMN(RHSDef, CC)) {
3027 // Same idea here, but with the RHS of the compare instead:
3028 //
3029 // Given:
3030 //
3031 // x = G_SUB 0, y
3032 // G_ICMP z, x
3033 //
3034 // Update the G_ICMP:
3035 //
3036 // G_ICMP z, y
3037 I.getOperand(3).setReg(RHSDef->getOperand(2).getReg());
3038 DidFold = true;
3039 }
3040
3041 if (DidFold) {
3042 // We can fold. Emit a CMN.
3043 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3044 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3045 bool Is32Bit =
3046 (MRI.getType(I.getOperand(2).getReg()).getSizeInBits() == 32);
3047 auto ImmFns = selectArithImmed(I.getOperand(3));
3048 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3049 unsigned ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3050
3051 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {I.getOperand(2).getReg()});
3052
3053 // If we matched a valid constant immediate, add those operands.
3054 if (ImmFns) {
3055 for (auto &RenderFn : *ImmFns)
3056 RenderFn(CmpMI);
3057 } else {
3058 CmpMI.addUse(I.getOperand(3).getReg());
3059 }
3060
3061 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3062
3063 // Add a CSet after the CMN.
3064 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
3065 MIRBuilder);
3066 I.eraseFromParent();
3067 }
3068
3069 return DidFold;
3070}
3071
Amara Emerson761ca2e2019-03-19 21:43:05 +00003072bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3073 // Try to match a vector splat operation into a dup instruction.
3074 // We're looking for this pattern:
3075 // %scalar:gpr(s64) = COPY $x0
3076 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3077 // %cst0:gpr(s32) = G_CONSTANT i32 0
3078 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3079 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3080 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3081 // %zerovec(<2 x s32>)
3082 //
3083 // ...into:
3084 // %splat = DUP %scalar
3085 // We use the regbank of the scalar to determine which kind of dup to use.
3086 MachineIRBuilder MIB(I);
3087 MachineRegisterInfo &MRI = *MIB.getMRI();
3088 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3089 using namespace TargetOpcode;
3090 using namespace MIPatternMatch;
3091
3092 // Begin matching the insert.
3093 auto *InsMI =
3094 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
3095 if (!InsMI)
3096 return false;
3097 // Match the undef vector operand.
3098 auto *UndefMI =
3099 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
3100 if (!UndefMI)
3101 return false;
3102 // Match the scalar being splatted.
3103 unsigned ScalarReg = InsMI->getOperand(2).getReg();
3104 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3105 // Match the index constant 0.
3106 int64_t Index = 0;
3107 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3108 return false;
3109
3110 // The shuffle's second operand doesn't matter if the mask is all zero.
3111 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
3112 if (!ZeroVec)
3113 return false;
3114 int64_t Zero = 0;
3115 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3116 return false;
3117 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
3118 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3119 return false; // This wasn't an all zeros vector.
3120 }
3121
3122 // We're done, now find out what kind of splat we need.
3123 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3124 LLT EltTy = VecTy.getElementType();
3125 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3126 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3127 return false;
3128 }
3129 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3130 static const unsigned OpcTable[2][2] = {
3131 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3132 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3133 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3134
3135 // For FP splats, we need to widen the scalar reg via undef too.
3136 if (IsFP) {
3137 MachineInstr *Widen = emitScalarToVector(
3138 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3139 if (!Widen)
3140 return false;
3141 ScalarReg = Widen->getOperand(0).getReg();
3142 }
3143 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3144 if (IsFP)
3145 Dup.addImm(0);
3146 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3147 I.eraseFromParent();
3148 return true;
3149}
3150
3151bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3152 if (TM.getOptLevel() == CodeGenOpt::None)
3153 return false;
3154 if (tryOptVectorDup(I))
3155 return true;
3156 return false;
3157}
3158
Amara Emerson1abe05c2019-02-21 20:20:16 +00003159bool AArch64InstructionSelector::selectShuffleVector(
3160 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003161 if (tryOptVectorShuffle(I))
3162 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003163 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3164 unsigned Src1Reg = I.getOperand(1).getReg();
3165 const LLT Src1Ty = MRI.getType(Src1Reg);
3166 unsigned Src2Reg = I.getOperand(2).getReg();
3167 const LLT Src2Ty = MRI.getType(Src2Reg);
3168
3169 MachineBasicBlock &MBB = *I.getParent();
3170 MachineFunction &MF = *MBB.getParent();
3171 LLVMContext &Ctx = MF.getFunction().getContext();
3172
3173 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3174 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00003175 // find the mask indices. If the mask element is undef, then
3176 // collectShuffleMaskIndices() will add a None entry for that index into
3177 // the list.
3178 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003179 collectShuffleMaskIndices(I, MRI, Mask);
3180 assert(!Mask.empty() && "Expected to find mask indices");
3181
3182 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3183 // it's originated from a <1 x T> type. Those should have been lowered into
3184 // G_BUILD_VECTOR earlier.
3185 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3186 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3187 return false;
3188 }
3189
3190 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3191
3192 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00003193 for (auto &MaybeVal : Mask) {
3194 // For now, any undef indexes we'll just assume to be 0. This should be
3195 // optimized in future, e.g. to select DUP etc.
3196 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003197 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3198 unsigned Offset = Byte + Val * BytesPerElt;
3199 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3200 }
3201 }
3202
Amara Emerson8acb0d92019-03-04 19:16:00 +00003203 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003204
3205 // Use a constant pool to load the index vector for TBL.
3206 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003207 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3208 if (!IndexLoad) {
3209 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3210 return false;
3211 }
3212
Amara Emerson8acb0d92019-03-04 19:16:00 +00003213 if (DstTy.getSizeInBits() != 128) {
3214 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3215 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00003216 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003217 if (!Concat) {
3218 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3219 return false;
3220 }
3221
3222 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3223 IndexLoad =
3224 emitScalarToVector(64, &AArch64::FPR128RegClass,
3225 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3226
3227 auto TBL1 = MIRBuilder.buildInstr(
3228 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3229 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3230 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3231
Amara Emerson3739a202019-03-15 21:59:50 +00003232 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00003233 MIRBuilder
3234 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3235 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003236 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3237 I.eraseFromParent();
3238 return true;
3239 }
3240
Amara Emerson1abe05c2019-02-21 20:20:16 +00003241 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3242 // Q registers for regalloc.
3243 auto RegSeq = MIRBuilder
3244 .buildInstr(TargetOpcode::REG_SEQUENCE,
3245 {&AArch64::QQRegClass}, {Src1Reg})
3246 .addImm(AArch64::qsub0)
3247 .addUse(Src2Reg)
3248 .addImm(AArch64::qsub1);
3249
3250 auto TBL2 =
3251 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3252 {RegSeq, IndexLoad->getOperand(0).getReg()});
3253 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3254 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3255 I.eraseFromParent();
3256 return true;
3257}
3258
Jessica Paquette16d67a32019-03-13 23:22:23 +00003259MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3260 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
3261 unsigned LaneIdx, const RegisterBank &RB,
3262 MachineIRBuilder &MIRBuilder) const {
3263 MachineInstr *InsElt = nullptr;
3264 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3265 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3266
3267 // Create a register to define with the insert if one wasn't passed in.
3268 if (!DstReg)
3269 DstReg = MRI.createVirtualRegister(DstRC);
3270
3271 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3272 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3273
3274 if (RB.getID() == AArch64::FPRRegBankID) {
3275 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3276 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3277 .addImm(LaneIdx)
3278 .addUse(InsSub->getOperand(0).getReg())
3279 .addImm(0);
3280 } else {
3281 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3282 .addImm(LaneIdx)
3283 .addUse(EltReg);
3284 }
3285
3286 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3287 return InsElt;
3288}
3289
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003290bool AArch64InstructionSelector::selectInsertElt(
3291 MachineInstr &I, MachineRegisterInfo &MRI) const {
3292 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3293
3294 // Get information on the destination.
3295 unsigned DstReg = I.getOperand(0).getReg();
3296 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003297 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003298
3299 // Get information on the element we want to insert into the destination.
3300 unsigned EltReg = I.getOperand(2).getReg();
3301 const LLT EltTy = MRI.getType(EltReg);
3302 unsigned EltSize = EltTy.getSizeInBits();
3303 if (EltSize < 16 || EltSize > 64)
3304 return false; // Don't support all element types yet.
3305
3306 // Find the definition of the index. Bail out if it's not defined by a
3307 // G_CONSTANT.
3308 unsigned IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00003309 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3310 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003311 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00003312 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003313
3314 // Perform the lane insert.
3315 unsigned SrcReg = I.getOperand(1).getReg();
3316 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3317 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003318
3319 if (VecSize < 128) {
3320 // If the vector we're inserting into is smaller than 128 bits, widen it
3321 // to 128 to do the insert.
3322 MachineInstr *ScalarToVec = emitScalarToVector(
3323 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3324 if (!ScalarToVec)
3325 return false;
3326 SrcReg = ScalarToVec->getOperand(0).getReg();
3327 }
3328
3329 // Create an insert into a new FPR128 register.
3330 // Note that if our vector is already 128 bits, we end up emitting an extra
3331 // register.
3332 MachineInstr *InsMI =
3333 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3334
3335 if (VecSize < 128) {
3336 // If we had to widen to perform the insert, then we have to demote back to
3337 // the original size to get the result we want.
3338 unsigned DemoteVec = InsMI->getOperand(0).getReg();
3339 const TargetRegisterClass *RC =
3340 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3341 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3342 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3343 return false;
3344 }
3345 unsigned SubReg = 0;
3346 if (!getSubRegForClass(RC, TRI, SubReg))
3347 return false;
3348 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3349 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3350 << "\n");
3351 return false;
3352 }
3353 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3354 .addReg(DemoteVec, 0, SubReg);
3355 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3356 } else {
3357 // No widening needed.
3358 InsMI->getOperand(0).setReg(DstReg);
3359 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3360 }
3361
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003362 I.eraseFromParent();
3363 return true;
3364}
3365
Amara Emerson5ec14602018-12-10 18:44:58 +00003366bool AArch64InstructionSelector::selectBuildVector(
3367 MachineInstr &I, MachineRegisterInfo &MRI) const {
3368 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3369 // Until we port more of the optimized selections, for now just use a vector
3370 // insert sequence.
3371 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3372 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3373 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003374 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003375 return false; // Don't support all element types yet.
3376 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003377 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003378
3379 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003380 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003381 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3382 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003383 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003384 return false;
3385
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003386 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003387 unsigned DstSize = DstTy.getSizeInBits();
3388
3389 // Keep track of the last MI we inserted. Later on, we might be able to save
3390 // a copy using it.
3391 MachineInstr *PrevMI = nullptr;
3392 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003393 // Note that if we don't do a subregister copy, we can end up making an
3394 // extra register.
3395 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3396 MIRBuilder);
3397 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003398 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003399
3400 // If DstTy's size in bits is less than 128, then emit a subregister copy
3401 // from DstVec to the last register we've defined.
3402 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003403 // Force this to be FPR using the destination vector.
3404 const TargetRegisterClass *RC =
3405 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003406 if (!RC)
3407 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003408 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3409 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3410 return false;
3411 }
3412
3413 unsigned SubReg = 0;
3414 if (!getSubRegForClass(RC, TRI, SubReg))
3415 return false;
3416 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3417 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3418 << "\n");
3419 return false;
3420 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003421
3422 unsigned Reg = MRI.createVirtualRegister(RC);
3423 unsigned DstReg = I.getOperand(0).getReg();
3424
Amara Emerson86271782019-03-18 19:20:10 +00003425 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3426 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003427 MachineOperand &RegOp = I.getOperand(1);
3428 RegOp.setReg(Reg);
3429 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3430 } else {
3431 // We don't need a subregister copy. Save a copy by re-using the
3432 // destination register on the final insert.
3433 assert(PrevMI && "PrevMI was null?");
3434 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3435 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3436 }
3437
Amara Emerson5ec14602018-12-10 18:44:58 +00003438 I.eraseFromParent();
3439 return true;
3440}
3441
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003442/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3443/// ID if it exists, and 0 otherwise.
3444static unsigned findIntrinsicID(MachineInstr &I) {
3445 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3446 return Op.isIntrinsicID();
3447 });
3448 if (IntrinOp == I.operands_end())
3449 return 0;
3450 return IntrinOp->getIntrinsicID();
3451}
3452
Jessica Paquette22c62152019-04-02 19:57:26 +00003453/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3454/// intrinsic.
3455static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3456 switch (NumBytesToStore) {
3457 // TODO: 1, 2, and 4 byte stores.
3458 case 8:
3459 return AArch64::STLXRX;
3460 default:
3461 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3462 << NumBytesToStore << ")\n");
3463 break;
3464 }
3465 return 0;
3466}
3467
3468bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3469 MachineInstr &I, MachineRegisterInfo &MRI) const {
3470 // Find the intrinsic ID.
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003471 unsigned IntrinID = findIntrinsicID(I);
3472 if (!IntrinID)
Jessica Paquette22c62152019-04-02 19:57:26 +00003473 return false;
Jessica Paquette22c62152019-04-02 19:57:26 +00003474 MachineIRBuilder MIRBuilder(I);
3475
3476 // Select the instruction.
3477 switch (IntrinID) {
3478 default:
3479 return false;
3480 case Intrinsic::trap:
3481 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3482 break;
3483 case Intrinsic::aarch64_stlxr:
3484 unsigned StatReg = I.getOperand(0).getReg();
3485 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3486 "Status register must be 32 bits!");
3487 unsigned SrcReg = I.getOperand(2).getReg();
3488
3489 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3490 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3491 return false;
3492 }
3493
3494 unsigned PtrReg = I.getOperand(3).getReg();
3495 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3496
3497 // Expect only one memory operand.
3498 if (!I.hasOneMemOperand())
3499 return false;
3500
3501 const MachineMemOperand *MemOp = *I.memoperands_begin();
3502 unsigned NumBytesToStore = MemOp->getSize();
3503 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3504 if (!Opc)
3505 return false;
3506
3507 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3508 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3509 }
3510
3511 I.eraseFromParent();
3512 return true;
3513}
3514
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003515bool AArch64InstructionSelector::selectIntrinsic(
3516 MachineInstr &I, MachineRegisterInfo &MRI) const {
3517 unsigned IntrinID = findIntrinsicID(I);
3518 if (!IntrinID)
3519 return false;
3520 MachineIRBuilder MIRBuilder(I);
3521
3522 switch (IntrinID) {
3523 default:
3524 break;
3525 case Intrinsic::aarch64_crypto_sha1h:
3526 unsigned DstReg = I.getOperand(0).getReg();
3527 unsigned SrcReg = I.getOperand(2).getReg();
3528
3529 // FIXME: Should this be an assert?
3530 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3531 MRI.getType(SrcReg).getSizeInBits() != 32)
3532 return false;
3533
3534 // The operation has to happen on FPRs. Set up some new FPR registers for
3535 // the source and destination if they are on GPRs.
3536 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3537 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3538 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3539
3540 // Make sure the copy ends up getting constrained properly.
3541 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3542 AArch64::GPR32RegClass, MRI);
3543 }
3544
3545 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3546 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3547
3548 // Actually insert the instruction.
3549 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3550 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3551
3552 // Did we create a new register for the destination?
3553 if (DstReg != I.getOperand(0).getReg()) {
3554 // Yep. Copy the result of the instruction back into the original
3555 // destination.
3556 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3557 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3558 AArch64::GPR32RegClass, MRI);
3559 }
3560
3561 I.eraseFromParent();
3562 return true;
3563 }
3564 return false;
3565}
3566
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003567/// SelectArithImmed - Select an immediate value that can be represented as
3568/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3569/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003570InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00003571AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003572 MachineInstr &MI = *Root.getParent();
3573 MachineBasicBlock &MBB = *MI.getParent();
3574 MachineFunction &MF = *MBB.getParent();
3575 MachineRegisterInfo &MRI = MF.getRegInfo();
3576
3577 // This function is called from the addsub_shifted_imm ComplexPattern,
3578 // which lists [imm] as the list of opcode it's interested in, however
3579 // we still need to check whether the operand is actually an immediate
3580 // here because the ComplexPattern opcode list is only used in
3581 // root-level opcode matching.
3582 uint64_t Immed;
3583 if (Root.isImm())
3584 Immed = Root.getImm();
3585 else if (Root.isCImm())
3586 Immed = Root.getCImm()->getZExtValue();
3587 else if (Root.isReg()) {
3588 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3589 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003590 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003591 MachineOperand &Op1 = Def->getOperand(1);
3592 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003593 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003594 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003595 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003596 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003597
3598 unsigned ShiftAmt;
3599
3600 if (Immed >> 12 == 0) {
3601 ShiftAmt = 0;
3602 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3603 ShiftAmt = 12;
3604 Immed = Immed >> 12;
3605 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003606 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003607
3608 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003609 return {{
3610 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3611 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3612 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003613}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003614
Daniel Sandersea8711b2017-10-16 03:36:29 +00003615/// Select a "register plus unscaled signed 9-bit immediate" address. This
3616/// should only match when there is an offset that is not valid for a scaled
3617/// immediate addressing mode. The "Size" argument is the size in bytes of the
3618/// memory reference, which is needed here to know what is valid for a scaled
3619/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003620InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003621AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3622 unsigned Size) const {
3623 MachineRegisterInfo &MRI =
3624 Root.getParent()->getParent()->getParent()->getRegInfo();
3625
3626 if (!Root.isReg())
3627 return None;
3628
3629 if (!isBaseWithConstantOffset(Root, MRI))
3630 return None;
3631
3632 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3633 if (!RootDef)
3634 return None;
3635
3636 MachineOperand &OffImm = RootDef->getOperand(2);
3637 if (!OffImm.isReg())
3638 return None;
3639 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3640 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3641 return None;
3642 int64_t RHSC;
3643 MachineOperand &RHSOp1 = RHS->getOperand(1);
3644 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3645 return None;
3646 RHSC = RHSOp1.getCImm()->getSExtValue();
3647
3648 // If the offset is valid as a scaled immediate, don't match here.
3649 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3650 return None;
3651 if (RHSC >= -256 && RHSC < 256) {
3652 MachineOperand &Base = RootDef->getOperand(1);
3653 return {{
3654 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3655 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3656 }};
3657 }
3658 return None;
3659}
3660
3661/// Select a "register plus scaled unsigned 12-bit immediate" address. The
3662/// "Size" argument is the size in bytes of the memory reference, which
3663/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003664InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003665AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3666 unsigned Size) const {
3667 MachineRegisterInfo &MRI =
3668 Root.getParent()->getParent()->getParent()->getRegInfo();
3669
3670 if (!Root.isReg())
3671 return None;
3672
3673 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3674 if (!RootDef)
3675 return None;
3676
3677 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3678 return {{
3679 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3680 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3681 }};
3682 }
3683
3684 if (isBaseWithConstantOffset(Root, MRI)) {
3685 MachineOperand &LHS = RootDef->getOperand(1);
3686 MachineOperand &RHS = RootDef->getOperand(2);
3687 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3688 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3689 if (LHSDef && RHSDef) {
3690 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3691 unsigned Scale = Log2_32(Size);
3692 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3693 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00003694 return {{
3695 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3696 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3697 }};
3698
Daniel Sandersea8711b2017-10-16 03:36:29 +00003699 return {{
3700 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3701 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3702 }};
3703 }
3704 }
3705 }
3706
3707 // Before falling back to our general case, check if the unscaled
3708 // instructions can handle this. If so, that's preferable.
3709 if (selectAddrModeUnscaled(Root, Size).hasValue())
3710 return None;
3711
3712 return {{
3713 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3714 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3715 }};
3716}
3717
Volkan Kelesf7f25682018-01-16 18:44:05 +00003718void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3719 const MachineInstr &MI) const {
3720 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3721 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3722 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
3723 assert(CstVal && "Expected constant value");
3724 MIB.addImm(CstVal.getValue());
3725}
3726
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003727namespace llvm {
3728InstructionSelector *
3729createAArch64InstructionSelector(const AArch64TargetMachine &TM,
3730 AArch64Subtarget &Subtarget,
3731 AArch64RegisterBankInfo &RBI) {
3732 return new AArch64InstructionSelector(TM, Subtarget, RBI);
3733}
3734}