blob: 7f5bc2792379d3a39d117e9b1695d3994df13157 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson9bf092d2019-04-09 21:22:43 +000070 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72
Amara Emerson5ec14602018-12-10 18:44:58 +000073 // Helper to generate an equivalent of scalar_to_vector into a new register,
74 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000075 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000076 const TargetRegisterClass *DstRC,
77 unsigned Scalar,
78 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000079
80 /// Emit a lane insert into \p DstReg, or a new vector register if None is
81 /// provided.
82 ///
83 /// The lane inserted into is defined by \p LaneIdx. The vector source
84 /// register is given by \p SrcReg. The register containing the element is
85 /// given by \p EltReg.
86 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
87 unsigned EltReg, unsigned LaneIdx,
88 const RegisterBank &RB,
89 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000090 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000092 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000093 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000094
Amara Emerson1abe05c2019-02-21 20:20:16 +000095 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +000096 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000097 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000098 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000099 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000100 bool selectSplitVectorUnmerge(MachineInstr &I,
101 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000102 bool selectIntrinsicWithSideEffects(MachineInstr &I,
103 MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000104 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000105 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000106 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000107 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
108 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
109 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000110
111 // Emit a vector concat operation.
112 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
113 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000114 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000115 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
116 const RegisterBank &DstRB, LLT ScalarTy,
117 unsigned VecReg, unsigned LaneIdx,
118 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000119
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000120 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000121
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000122 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
123 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000124
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000125 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000126 return selectAddrModeUnscaled(Root, 1);
127 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000128 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000129 return selectAddrModeUnscaled(Root, 2);
130 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000131 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000132 return selectAddrModeUnscaled(Root, 4);
133 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000134 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000135 return selectAddrModeUnscaled(Root, 8);
136 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000137 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000138 return selectAddrModeUnscaled(Root, 16);
139 }
140
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000141 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
142 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000143 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000144 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000145 return selectAddrModeIndexed(Root, Width / 8);
146 }
147
Volkan Kelesf7f25682018-01-16 18:44:05 +0000148 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
149
Amara Emerson1e8c1642018-07-31 00:09:02 +0000150 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
151 void materializeLargeCMVal(MachineInstr &I, const Value *V,
152 unsigned char OpFlags) const;
153
Amara Emerson761ca2e2019-03-19 21:43:05 +0000154 // Optimization methods.
155
156 // Helper function to check if a reg def is an MI with a given opcode and
157 // returns it if so.
158 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
159 MachineIRBuilder &MIB) const {
160 auto *Def = MIB.getMRI()->getVRegDef(Reg);
161 if (!Def || Def->getOpcode() != Opc)
162 return nullptr;
163 return Def;
164 }
165
166 bool tryOptVectorShuffle(MachineInstr &I) const;
167 bool tryOptVectorDup(MachineInstr &MI) const;
168
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000169 const AArch64TargetMachine &TM;
170 const AArch64Subtarget &STI;
171 const AArch64InstrInfo &TII;
172 const AArch64RegisterInfo &TRI;
173 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000174
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000175#define GET_GLOBALISEL_PREDICATES_DECL
176#include "AArch64GenGlobalISel.inc"
177#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000178
179// We declare the temporaries used by selectImpl() in the class to minimize the
180// cost of constructing placeholder values.
181#define GET_GLOBALISEL_TEMPORARIES_DECL
182#include "AArch64GenGlobalISel.inc"
183#undef GET_GLOBALISEL_TEMPORARIES_DECL
184};
185
186} // end anonymous namespace
187
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000188#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000189#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000190#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000191
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000192AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000193 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
194 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000195 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000196 TRI(*STI.getRegisterInfo()), RBI(RBI),
197#define GET_GLOBALISEL_PREDICATES_INIT
198#include "AArch64GenGlobalISel.inc"
199#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000200#define GET_GLOBALISEL_TEMPORARIES_INIT
201#include "AArch64GenGlobalISel.inc"
202#undef GET_GLOBALISEL_TEMPORARIES_INIT
203{
204}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000205
Tim Northoverfb8d9892016-10-12 22:49:15 +0000206// FIXME: This should be target-independent, inferred from the types declared
207// for each class in the bank.
208static const TargetRegisterClass *
209getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000210 const RegisterBankInfo &RBI,
211 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000212 if (RB.getID() == AArch64::GPRRegBankID) {
213 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000214 return GetAllRegSet ? &AArch64::GPR32allRegClass
215 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000216 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000217 return GetAllRegSet ? &AArch64::GPR64allRegClass
218 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000219 return nullptr;
220 }
221
222 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000223 if (Ty.getSizeInBits() <= 16)
224 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000225 if (Ty.getSizeInBits() == 32)
226 return &AArch64::FPR32RegClass;
227 if (Ty.getSizeInBits() == 64)
228 return &AArch64::FPR64RegClass;
229 if (Ty.getSizeInBits() == 128)
230 return &AArch64::FPR128RegClass;
231 return nullptr;
232 }
233
234 return nullptr;
235}
236
Jessica Paquette245047d2019-01-24 22:00:41 +0000237/// Given a register bank, and size in bits, return the smallest register class
238/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000239static const TargetRegisterClass *
240getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
241 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000242 unsigned RegBankID = RB.getID();
243
244 if (RegBankID == AArch64::GPRRegBankID) {
245 if (SizeInBits <= 32)
246 return GetAllRegSet ? &AArch64::GPR32allRegClass
247 : &AArch64::GPR32RegClass;
248 if (SizeInBits == 64)
249 return GetAllRegSet ? &AArch64::GPR64allRegClass
250 : &AArch64::GPR64RegClass;
251 }
252
253 if (RegBankID == AArch64::FPRRegBankID) {
254 switch (SizeInBits) {
255 default:
256 return nullptr;
257 case 8:
258 return &AArch64::FPR8RegClass;
259 case 16:
260 return &AArch64::FPR16RegClass;
261 case 32:
262 return &AArch64::FPR32RegClass;
263 case 64:
264 return &AArch64::FPR64RegClass;
265 case 128:
266 return &AArch64::FPR128RegClass;
267 }
268 }
269
270 return nullptr;
271}
272
273/// Returns the correct subregister to use for a given register class.
274static bool getSubRegForClass(const TargetRegisterClass *RC,
275 const TargetRegisterInfo &TRI, unsigned &SubReg) {
276 switch (TRI.getRegSizeInBits(*RC)) {
277 case 8:
278 SubReg = AArch64::bsub;
279 break;
280 case 16:
281 SubReg = AArch64::hsub;
282 break;
283 case 32:
284 if (RC == &AArch64::GPR32RegClass)
285 SubReg = AArch64::sub_32;
286 else
287 SubReg = AArch64::ssub;
288 break;
289 case 64:
290 SubReg = AArch64::dsub;
291 break;
292 default:
293 LLVM_DEBUG(
294 dbgs() << "Couldn't find appropriate subregister for register class.");
295 return false;
296 }
297
298 return true;
299}
300
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000301/// Check whether \p I is a currently unsupported binary operation:
302/// - it has an unsized type
303/// - an operand is not a vreg
304/// - all operands are not in the same bank
305/// These are checks that should someday live in the verifier, but right now,
306/// these are mostly limitations of the aarch64 selector.
307static bool unsupportedBinOp(const MachineInstr &I,
308 const AArch64RegisterBankInfo &RBI,
309 const MachineRegisterInfo &MRI,
310 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000311 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000312 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000313 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000314 return true;
315 }
316
317 const RegisterBank *PrevOpBank = nullptr;
318 for (auto &MO : I.operands()) {
319 // FIXME: Support non-register operands.
320 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000321 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000322 return true;
323 }
324
325 // FIXME: Can generic operations have physical registers operands? If
326 // so, this will need to be taught about that, and we'll need to get the
327 // bank out of the minimal class for the register.
328 // Either way, this needs to be documented (and possibly verified).
329 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000330 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000331 return true;
332 }
333
334 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
335 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000336 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000337 return true;
338 }
339
340 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000341 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000342 return true;
343 }
344 PrevOpBank = OpBank;
345 }
346 return false;
347}
348
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000349/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000350/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000351/// and of size \p OpSize.
352/// \returns \p GenericOpc if the combination is unsupported.
353static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
354 unsigned OpSize) {
355 switch (RegBankID) {
356 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000357 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000358 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000359 case TargetOpcode::G_SHL:
360 return AArch64::LSLVWr;
361 case TargetOpcode::G_LSHR:
362 return AArch64::LSRVWr;
363 case TargetOpcode::G_ASHR:
364 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000365 default:
366 return GenericOpc;
367 }
Tim Northover55782222016-10-18 20:03:48 +0000368 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000369 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000370 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000371 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000372 case TargetOpcode::G_SHL:
373 return AArch64::LSLVXr;
374 case TargetOpcode::G_LSHR:
375 return AArch64::LSRVXr;
376 case TargetOpcode::G_ASHR:
377 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000378 default:
379 return GenericOpc;
380 }
381 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000382 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000383 case AArch64::FPRRegBankID:
384 switch (OpSize) {
385 case 32:
386 switch (GenericOpc) {
387 case TargetOpcode::G_FADD:
388 return AArch64::FADDSrr;
389 case TargetOpcode::G_FSUB:
390 return AArch64::FSUBSrr;
391 case TargetOpcode::G_FMUL:
392 return AArch64::FMULSrr;
393 case TargetOpcode::G_FDIV:
394 return AArch64::FDIVSrr;
395 default:
396 return GenericOpc;
397 }
398 case 64:
399 switch (GenericOpc) {
400 case TargetOpcode::G_FADD:
401 return AArch64::FADDDrr;
402 case TargetOpcode::G_FSUB:
403 return AArch64::FSUBDrr;
404 case TargetOpcode::G_FMUL:
405 return AArch64::FMULDrr;
406 case TargetOpcode::G_FDIV:
407 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000408 case TargetOpcode::G_OR:
409 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000410 default:
411 return GenericOpc;
412 }
413 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000414 break;
415 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000416 return GenericOpc;
417}
418
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000419/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
420/// appropriate for the (value) register bank \p RegBankID and of memory access
421/// size \p OpSize. This returns the variant with the base+unsigned-immediate
422/// addressing mode (e.g., LDRXui).
423/// \returns \p GenericOpc if the combination is unsupported.
424static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
425 unsigned OpSize) {
426 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
427 switch (RegBankID) {
428 case AArch64::GPRRegBankID:
429 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000430 case 8:
431 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
432 case 16:
433 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000434 case 32:
435 return isStore ? AArch64::STRWui : AArch64::LDRWui;
436 case 64:
437 return isStore ? AArch64::STRXui : AArch64::LDRXui;
438 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000439 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000440 case AArch64::FPRRegBankID:
441 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000442 case 8:
443 return isStore ? AArch64::STRBui : AArch64::LDRBui;
444 case 16:
445 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000446 case 32:
447 return isStore ? AArch64::STRSui : AArch64::LDRSui;
448 case 64:
449 return isStore ? AArch64::STRDui : AArch64::LDRDui;
450 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000451 break;
452 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000453 return GenericOpc;
454}
455
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000456#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000457/// Helper function that verifies that we have a valid copy at the end of
458/// selectCopy. Verifies that the source and dest have the expected sizes and
459/// then returns true.
460static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
461 const MachineRegisterInfo &MRI,
462 const TargetRegisterInfo &TRI,
463 const RegisterBankInfo &RBI) {
464 const unsigned DstReg = I.getOperand(0).getReg();
465 const unsigned SrcReg = I.getOperand(1).getReg();
466 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
467 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000468
Jessica Paquette245047d2019-01-24 22:00:41 +0000469 // Make sure the size of the source and dest line up.
470 assert(
471 (DstSize == SrcSize ||
472 // Copies are a mean to setup initial types, the number of
473 // bits may not exactly match.
474 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
475 // Copies are a mean to copy bits around, as long as we are
476 // on the same register class, that's fine. Otherwise, that
477 // means we need some SUBREG_TO_REG or AND & co.
478 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
479 "Copy with different width?!");
480
481 // Check the size of the destination.
482 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
483 "GPRs cannot get more than 64-bit width values");
484
485 return true;
486}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000487#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000488
489/// Helper function for selectCopy. Inserts a subregister copy from
490/// \p *From to \p *To, linking it up to \p I.
491///
492/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
493///
494/// CopyReg (From class) = COPY SrcReg
495/// SubRegCopy (To class) = COPY CopyReg:SubReg
496/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000497static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000498 const RegisterBankInfo &RBI, unsigned SrcReg,
499 const TargetRegisterClass *From,
500 const TargetRegisterClass *To,
501 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000502 MachineIRBuilder MIB(I);
503 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000504 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
505 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000506 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000507 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000508
509 // It's possible that the destination register won't be constrained. Make
510 // sure that happens.
511 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
512 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
513
Amara Emersondb211892018-02-20 05:11:57 +0000514 return true;
515}
516
Quentin Colombetcb629a82016-10-12 03:57:49 +0000517static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
518 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
519 const RegisterBankInfo &RBI) {
520
521 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000522 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000523 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
524 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
525 const TargetRegisterClass *DstRC = getMinClassForRegBank(
526 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
527 if (!DstRC) {
528 LLVM_DEBUG(dbgs() << "Unexpected dest size "
529 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000530 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000531 }
532
Jessica Paquette245047d2019-01-24 22:00:41 +0000533 // A couple helpers below, for making sure that the copy we produce is valid.
534
535 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
536 // to verify that the src and dst are the same size, since that's handled by
537 // the SUBREG_TO_REG.
538 bool KnownValid = false;
539
540 // Returns true, or asserts if something we don't expect happens. Instead of
541 // returning true, we return isValidCopy() to ensure that we verify the
542 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000543 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000544 // If we have a bitcast or something, we can't have physical registers.
545 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000546 (I.isCopy() ||
547 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
548 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
549 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000550 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000551 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000552 return true;
553 };
554
555 // Is this a copy? If so, then we may need to insert a subregister copy, or
556 // a SUBREG_TO_REG.
557 if (I.isCopy()) {
558 // Yes. Check if there's anything to fix up.
559 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
560 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
Amara Emerson7e9f3482018-02-18 17:10:49 +0000561 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000562 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
563 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000564 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000565
566 // Is this a cross-bank copy?
567 if (DstRegBank.getID() != SrcRegBank.getID()) {
568 // If we're doing a cross-bank copy on different-sized registers, we need
569 // to do a bit more work.
570 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
571 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
572
573 if (SrcSize > DstSize) {
574 // We're doing a cross-bank copy into a smaller register. We need a
575 // subregister copy. First, get a register class that's on the same bank
576 // as the destination, but the same size as the source.
577 const TargetRegisterClass *SubregRC =
578 getMinClassForRegBank(DstRegBank, SrcSize, true);
579 assert(SubregRC && "Didn't get a register class for subreg?");
580
581 // Get the appropriate subregister for the destination.
582 unsigned SubReg = 0;
583 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
584 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
585 return false;
586 }
587
588 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000589 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000590 return CheckCopy();
591 }
592
593 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
594 SrcSize == 16) {
595 // Special case for FPR16 to GPR32.
596 // FIXME: This can probably be generalized like the above case.
597 unsigned PromoteReg =
598 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
599 BuildMI(*I.getParent(), I, I.getDebugLoc(),
600 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
601 .addImm(0)
602 .addUse(SrcReg)
603 .addImm(AArch64::hsub);
604 MachineOperand &RegOp = I.getOperand(1);
605 RegOp.setReg(PromoteReg);
606
607 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
608 KnownValid = true;
609 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000610 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000611
612 // If the destination is a physical register, then there's nothing to
613 // change, so we're done.
614 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
615 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000616 }
617
Jessica Paquette245047d2019-01-24 22:00:41 +0000618 // No need to constrain SrcReg. It will get constrained when we hit another
619 // of its use or its defs. Copies do not have constraints.
620 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000621 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
622 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000623 return false;
624 }
625 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000626 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000627}
628
Tim Northover69271c62016-10-12 22:49:11 +0000629static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
630 if (!DstTy.isScalar() || !SrcTy.isScalar())
631 return GenericOpc;
632
633 const unsigned DstSize = DstTy.getSizeInBits();
634 const unsigned SrcSize = SrcTy.getSizeInBits();
635
636 switch (DstSize) {
637 case 32:
638 switch (SrcSize) {
639 case 32:
640 switch (GenericOpc) {
641 case TargetOpcode::G_SITOFP:
642 return AArch64::SCVTFUWSri;
643 case TargetOpcode::G_UITOFP:
644 return AArch64::UCVTFUWSri;
645 case TargetOpcode::G_FPTOSI:
646 return AArch64::FCVTZSUWSr;
647 case TargetOpcode::G_FPTOUI:
648 return AArch64::FCVTZUUWSr;
649 default:
650 return GenericOpc;
651 }
652 case 64:
653 switch (GenericOpc) {
654 case TargetOpcode::G_SITOFP:
655 return AArch64::SCVTFUXSri;
656 case TargetOpcode::G_UITOFP:
657 return AArch64::UCVTFUXSri;
658 case TargetOpcode::G_FPTOSI:
659 return AArch64::FCVTZSUWDr;
660 case TargetOpcode::G_FPTOUI:
661 return AArch64::FCVTZUUWDr;
662 default:
663 return GenericOpc;
664 }
665 default:
666 return GenericOpc;
667 }
668 case 64:
669 switch (SrcSize) {
670 case 32:
671 switch (GenericOpc) {
672 case TargetOpcode::G_SITOFP:
673 return AArch64::SCVTFUWDri;
674 case TargetOpcode::G_UITOFP:
675 return AArch64::UCVTFUWDri;
676 case TargetOpcode::G_FPTOSI:
677 return AArch64::FCVTZSUXSr;
678 case TargetOpcode::G_FPTOUI:
679 return AArch64::FCVTZUUXSr;
680 default:
681 return GenericOpc;
682 }
683 case 64:
684 switch (GenericOpc) {
685 case TargetOpcode::G_SITOFP:
686 return AArch64::SCVTFUXDri;
687 case TargetOpcode::G_UITOFP:
688 return AArch64::UCVTFUXDri;
689 case TargetOpcode::G_FPTOSI:
690 return AArch64::FCVTZSUXDr;
691 case TargetOpcode::G_FPTOUI:
692 return AArch64::FCVTZUUXDr;
693 default:
694 return GenericOpc;
695 }
696 default:
697 return GenericOpc;
698 }
699 default:
700 return GenericOpc;
701 };
702 return GenericOpc;
703}
704
Tim Northover6c02ad52016-10-12 22:49:04 +0000705static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
706 switch (P) {
707 default:
708 llvm_unreachable("Unknown condition code!");
709 case CmpInst::ICMP_NE:
710 return AArch64CC::NE;
711 case CmpInst::ICMP_EQ:
712 return AArch64CC::EQ;
713 case CmpInst::ICMP_SGT:
714 return AArch64CC::GT;
715 case CmpInst::ICMP_SGE:
716 return AArch64CC::GE;
717 case CmpInst::ICMP_SLT:
718 return AArch64CC::LT;
719 case CmpInst::ICMP_SLE:
720 return AArch64CC::LE;
721 case CmpInst::ICMP_UGT:
722 return AArch64CC::HI;
723 case CmpInst::ICMP_UGE:
724 return AArch64CC::HS;
725 case CmpInst::ICMP_ULT:
726 return AArch64CC::LO;
727 case CmpInst::ICMP_ULE:
728 return AArch64CC::LS;
729 }
730}
731
Tim Northover7dd378d2016-10-12 22:49:07 +0000732static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
733 AArch64CC::CondCode &CondCode,
734 AArch64CC::CondCode &CondCode2) {
735 CondCode2 = AArch64CC::AL;
736 switch (P) {
737 default:
738 llvm_unreachable("Unknown FP condition!");
739 case CmpInst::FCMP_OEQ:
740 CondCode = AArch64CC::EQ;
741 break;
742 case CmpInst::FCMP_OGT:
743 CondCode = AArch64CC::GT;
744 break;
745 case CmpInst::FCMP_OGE:
746 CondCode = AArch64CC::GE;
747 break;
748 case CmpInst::FCMP_OLT:
749 CondCode = AArch64CC::MI;
750 break;
751 case CmpInst::FCMP_OLE:
752 CondCode = AArch64CC::LS;
753 break;
754 case CmpInst::FCMP_ONE:
755 CondCode = AArch64CC::MI;
756 CondCode2 = AArch64CC::GT;
757 break;
758 case CmpInst::FCMP_ORD:
759 CondCode = AArch64CC::VC;
760 break;
761 case CmpInst::FCMP_UNO:
762 CondCode = AArch64CC::VS;
763 break;
764 case CmpInst::FCMP_UEQ:
765 CondCode = AArch64CC::EQ;
766 CondCode2 = AArch64CC::VS;
767 break;
768 case CmpInst::FCMP_UGT:
769 CondCode = AArch64CC::HI;
770 break;
771 case CmpInst::FCMP_UGE:
772 CondCode = AArch64CC::PL;
773 break;
774 case CmpInst::FCMP_ULT:
775 CondCode = AArch64CC::LT;
776 break;
777 case CmpInst::FCMP_ULE:
778 CondCode = AArch64CC::LE;
779 break;
780 case CmpInst::FCMP_UNE:
781 CondCode = AArch64CC::NE;
782 break;
783 }
784}
785
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000786bool AArch64InstructionSelector::selectCompareBranch(
787 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
788
789 const unsigned CondReg = I.getOperand(0).getReg();
790 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
791 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000792 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
793 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000794 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
795 return false;
796
797 unsigned LHS = CCMI->getOperand(2).getReg();
798 unsigned RHS = CCMI->getOperand(3).getReg();
799 if (!getConstantVRegVal(RHS, MRI))
800 std::swap(RHS, LHS);
801
802 const auto RHSImm = getConstantVRegVal(RHS, MRI);
803 if (!RHSImm || *RHSImm != 0)
804 return false;
805
806 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
807 if (RB.getID() != AArch64::GPRRegBankID)
808 return false;
809
810 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
811 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
812 return false;
813
814 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
815 unsigned CBOpc = 0;
816 if (CmpWidth <= 32)
817 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
818 else if (CmpWidth == 64)
819 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
820 else
821 return false;
822
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000823 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
824 .addUse(LHS)
825 .addMBB(DestMBB)
826 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000827
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000828 I.eraseFromParent();
829 return true;
830}
831
Amara Emerson9bf092d2019-04-09 21:22:43 +0000832bool AArch64InstructionSelector::selectVectorSHL(
833 MachineInstr &I, MachineRegisterInfo &MRI) const {
834 assert(I.getOpcode() == TargetOpcode::G_SHL);
835 unsigned DstReg = I.getOperand(0).getReg();
836 const LLT Ty = MRI.getType(DstReg);
837 unsigned Src1Reg = I.getOperand(1).getReg();
838 unsigned Src2Reg = I.getOperand(2).getReg();
839
840 if (!Ty.isVector())
841 return false;
842
843 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000844 if (Ty == LLT::vector(4, 32)) {
845 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000846 } else if (Ty == LLT::vector(2, 32)) {
847 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000848 } else {
849 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
850 return false;
851 }
852
853 MachineIRBuilder MIB(I);
854 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
855 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
856 I.eraseFromParent();
857 return true;
858}
859
860bool AArch64InstructionSelector::selectVectorASHR(
861 MachineInstr &I, MachineRegisterInfo &MRI) const {
862 assert(I.getOpcode() == TargetOpcode::G_ASHR);
863 unsigned DstReg = I.getOperand(0).getReg();
864 const LLT Ty = MRI.getType(DstReg);
865 unsigned Src1Reg = I.getOperand(1).getReg();
866 unsigned Src2Reg = I.getOperand(2).getReg();
867
868 if (!Ty.isVector())
869 return false;
870
871 // There is not a shift right register instruction, but the shift left
872 // register instruction takes a signed value, where negative numbers specify a
873 // right shift.
874
875 unsigned Opc = 0;
876 unsigned NegOpc = 0;
877 const TargetRegisterClass *RC = nullptr;
878 if (Ty == LLT::vector(4, 32)) {
879 Opc = AArch64::SSHLv4i32;
880 NegOpc = AArch64::NEGv4i32;
881 RC = &AArch64::FPR128RegClass;
882 } else if (Ty == LLT::vector(2, 32)) {
883 Opc = AArch64::SSHLv2i32;
884 NegOpc = AArch64::NEGv2i32;
885 RC = &AArch64::FPR64RegClass;
886 } else {
887 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
888 return false;
889 }
890
891 MachineIRBuilder MIB(I);
892 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
893 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
894 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
895 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
896 I.eraseFromParent();
897 return true;
898}
899
Tim Northovere9600d82017-02-08 17:57:27 +0000900bool AArch64InstructionSelector::selectVaStartAAPCS(
901 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
902 return false;
903}
904
905bool AArch64InstructionSelector::selectVaStartDarwin(
906 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
907 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
908 unsigned ListReg = I.getOperand(0).getReg();
909
910 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
911
912 auto MIB =
913 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
914 .addDef(ArgsAddrReg)
915 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
916 .addImm(0)
917 .addImm(0);
918
919 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
920
921 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
922 .addUse(ArgsAddrReg)
923 .addUse(ListReg)
924 .addImm(0)
925 .addMemOperand(*I.memoperands_begin());
926
927 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
928 I.eraseFromParent();
929 return true;
930}
931
Amara Emerson1e8c1642018-07-31 00:09:02 +0000932void AArch64InstructionSelector::materializeLargeCMVal(
933 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
934 MachineBasicBlock &MBB = *I.getParent();
935 MachineFunction &MF = *MBB.getParent();
936 MachineRegisterInfo &MRI = MF.getRegInfo();
937 MachineIRBuilder MIB(I);
938
Aditya Nandakumarcef44a22018-12-11 00:48:50 +0000939 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +0000940 MovZ->addOperand(MF, I.getOperand(1));
941 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
942 AArch64II::MO_NC);
943 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
944 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
945
946 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
947 unsigned ForceDstReg) {
948 unsigned DstReg = ForceDstReg
949 ? ForceDstReg
950 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
951 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
952 if (auto *GV = dyn_cast<GlobalValue>(V)) {
953 MovI->addOperand(MF, MachineOperand::CreateGA(
954 GV, MovZ->getOperand(1).getOffset(), Flags));
955 } else {
956 MovI->addOperand(
957 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
958 MovZ->getOperand(1).getOffset(), Flags));
959 }
960 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
961 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
962 return DstReg;
963 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +0000964 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +0000965 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
966 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
967 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
968 return;
969}
970
Daniel Sandersf76f3152017-11-16 00:46:35 +0000971bool AArch64InstructionSelector::select(MachineInstr &I,
972 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000973 assert(I.getParent() && "Instruction should be in a basic block!");
974 assert(I.getParent()->getParent() && "Instruction should be in a function!");
975
976 MachineBasicBlock &MBB = *I.getParent();
977 MachineFunction &MF = *MBB.getParent();
978 MachineRegisterInfo &MRI = MF.getRegInfo();
979
Tim Northovercdf23f12016-10-31 18:30:59 +0000980 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000981 // G_PHI requires same handling as PHI
982 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +0000983 // Certain non-generic instructions also need some special handling.
984
985 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
986 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000987
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000988 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +0000989 const unsigned DefReg = I.getOperand(0).getReg();
990 const LLT DefTy = MRI.getType(DefReg);
991
992 const TargetRegisterClass *DefRC = nullptr;
993 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
994 DefRC = TRI.getRegClass(DefReg);
995 } else {
996 const RegClassOrRegBank &RegClassOrBank =
997 MRI.getRegClassOrRegBank(DefReg);
998
999 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1000 if (!DefRC) {
1001 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001002 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001003 return false;
1004 }
1005 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1006 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1007 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001008 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001009 return false;
1010 }
1011 }
1012 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001013 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001014
1015 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1016 }
1017
1018 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001019 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001020
1021 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001022 }
1023
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001024
1025 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001026 LLVM_DEBUG(
1027 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001028 return false;
1029 }
1030
Daniel Sandersf76f3152017-11-16 00:46:35 +00001031 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001032 return true;
1033
Tim Northover32a078a2016-09-15 10:09:59 +00001034 LLT Ty =
1035 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001036
Amara Emerson3739a202019-03-15 21:59:50 +00001037 MachineIRBuilder MIB(I);
1038
Tim Northover69271c62016-10-12 22:49:11 +00001039 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001040 case TargetOpcode::G_BRCOND: {
1041 if (Ty.getSizeInBits() > 32) {
1042 // We shouldn't need this on AArch64, but it would be implemented as an
1043 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1044 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001045 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1046 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001047 return false;
1048 }
1049
1050 const unsigned CondReg = I.getOperand(0).getReg();
1051 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1052
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001053 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1054 // instructions will not be produced, as they are conditional branch
1055 // instructions that do not set flags.
1056 bool ProduceNonFlagSettingCondBr =
1057 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1058 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001059 return true;
1060
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001061 if (ProduceNonFlagSettingCondBr) {
1062 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1063 .addUse(CondReg)
1064 .addImm(/*bit offset=*/0)
1065 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001066
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001067 I.eraseFromParent();
1068 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1069 } else {
1070 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1071 .addDef(AArch64::WZR)
1072 .addUse(CondReg)
1073 .addImm(1);
1074 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1075 auto Bcc =
1076 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1077 .addImm(AArch64CC::EQ)
1078 .addMBB(DestMBB);
1079
1080 I.eraseFromParent();
1081 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1082 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001083 }
1084
Kristof Beyls65a12c02017-01-30 09:13:18 +00001085 case TargetOpcode::G_BRINDIRECT: {
1086 I.setDesc(TII.get(AArch64::BR));
1087 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1088 }
1089
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001090 case TargetOpcode::G_BSWAP: {
1091 // Handle vector types for G_BSWAP directly.
1092 unsigned DstReg = I.getOperand(0).getReg();
1093 LLT DstTy = MRI.getType(DstReg);
1094
1095 // We should only get vector types here; everything else is handled by the
1096 // importer right now.
1097 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1098 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1099 return false;
1100 }
1101
1102 // Only handle 4 and 2 element vectors for now.
1103 // TODO: 16-bit elements.
1104 unsigned NumElts = DstTy.getNumElements();
1105 if (NumElts != 4 && NumElts != 2) {
1106 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1107 return false;
1108 }
1109
1110 // Choose the correct opcode for the supported types. Right now, that's
1111 // v2s32, v4s32, and v2s64.
1112 unsigned Opc = 0;
1113 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1114 if (EltSize == 32)
1115 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1116 : AArch64::REV32v16i8;
1117 else if (EltSize == 64)
1118 Opc = AArch64::REV64v16i8;
1119
1120 // We should always get something by the time we get here...
1121 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1122
1123 I.setDesc(TII.get(Opc));
1124 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1125 }
1126
Tim Northover4494d692016-10-18 19:47:57 +00001127 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001128 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001129 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1130
1131 const LLT s32 = LLT::scalar(32);
1132 const LLT s64 = LLT::scalar(64);
1133 const LLT p0 = LLT::pointer(0, 64);
1134
1135 const unsigned DefReg = I.getOperand(0).getReg();
1136 const LLT DefTy = MRI.getType(DefReg);
1137 const unsigned DefSize = DefTy.getSizeInBits();
1138 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1139
1140 // FIXME: Redundant check, but even less readable when factored out.
1141 if (isFP) {
1142 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001143 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1144 << " constant, expected: " << s32 << " or " << s64
1145 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001146 return false;
1147 }
1148
1149 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001150 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1151 << " constant on bank: " << RB
1152 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001153 return false;
1154 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001155
1156 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1157 // can be sure tablegen works correctly and isn't rescued by this code.
1158 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1159 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001160 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001161 // s32 and s64 are covered by tablegen.
1162 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001163 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1164 << " constant, expected: " << s32 << ", " << s64
1165 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001166 return false;
1167 }
1168
1169 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001170 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1171 << " constant on bank: " << RB
1172 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001173 return false;
1174 }
1175 }
1176
1177 const unsigned MovOpc =
1178 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1179
1180 I.setDesc(TII.get(MovOpc));
1181
1182 if (isFP) {
1183 const TargetRegisterClass &GPRRC =
1184 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1185 const TargetRegisterClass &FPRRC =
1186 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1187
1188 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1189 MachineOperand &RegOp = I.getOperand(0);
1190 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001191 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1192 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001193
1194 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001195 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001196 return false;
1197 }
1198
1199 MachineOperand &ImmOp = I.getOperand(1);
1200 // FIXME: Is going through int64_t always correct?
1201 ImmOp.ChangeToImmediate(
1202 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001203 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001204 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1205 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001206 } else if (I.getOperand(1).isImm()) {
1207 uint64_t Val = I.getOperand(1).getImm();
1208 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001209 }
1210
1211 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1212 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001213 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001214 case TargetOpcode::G_EXTRACT: {
1215 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001216 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001217 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001218 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001219 // Larger extracts are vectors, same-size extracts should be something else
1220 // by now (either split up or simplified to a COPY).
1221 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1222 return false;
1223
Amara Emersonbc03bae2018-02-18 17:03:02 +00001224 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001225 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1226 Ty.getSizeInBits() - 1);
1227
Amara Emersonbc03bae2018-02-18 17:03:02 +00001228 if (SrcSize < 64) {
1229 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1230 "unexpected G_EXTRACT types");
1231 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1232 }
1233
Tim Northover7b6d66c2017-07-20 22:58:38 +00001234 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001235 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001236 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1237 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001238 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1239 AArch64::GPR32RegClass, MRI);
1240 I.getOperand(0).setReg(DstReg);
1241
1242 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1243 }
1244
1245 case TargetOpcode::G_INSERT: {
1246 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001247 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1248 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001249 // Larger inserts are vectors, same-size ones should be something else by
1250 // now (split up or turned into COPYs).
1251 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1252 return false;
1253
Amara Emersonbc03bae2018-02-18 17:03:02 +00001254 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001255 unsigned LSB = I.getOperand(3).getImm();
1256 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001257 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001258 MachineInstrBuilder(MF, I).addImm(Width - 1);
1259
Amara Emersonbc03bae2018-02-18 17:03:02 +00001260 if (DstSize < 64) {
1261 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1262 "unexpected G_INSERT types");
1263 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1264 }
1265
Tim Northover7b6d66c2017-07-20 22:58:38 +00001266 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1267 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1268 TII.get(AArch64::SUBREG_TO_REG))
1269 .addDef(SrcReg)
1270 .addImm(0)
1271 .addUse(I.getOperand(2).getReg())
1272 .addImm(AArch64::sub_32);
1273 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1274 AArch64::GPR32RegClass, MRI);
1275 I.getOperand(2).setReg(SrcReg);
1276
1277 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1278 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001279 case TargetOpcode::G_FRAME_INDEX: {
1280 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001281 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001282 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1283 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001284 return false;
1285 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001286 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001287
1288 // MOs for a #0 shifted immediate.
1289 I.addOperand(MachineOperand::CreateImm(0));
1290 I.addOperand(MachineOperand::CreateImm(0));
1291
1292 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1293 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001294
1295 case TargetOpcode::G_GLOBAL_VALUE: {
1296 auto GV = I.getOperand(1).getGlobal();
1297 if (GV->isThreadLocal()) {
1298 // FIXME: we don't support TLS yet.
1299 return false;
1300 }
1301 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001302 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001303 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001304 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001305 } else if (TM.getCodeModel() == CodeModel::Large) {
1306 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001307 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001308 I.eraseFromParent();
1309 return true;
David Green9dd1d452018-08-22 11:31:39 +00001310 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1311 I.setDesc(TII.get(AArch64::ADR));
1312 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001313 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001314 I.setDesc(TII.get(AArch64::MOVaddr));
1315 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1316 MachineInstrBuilder MIB(MF, I);
1317 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1318 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1319 }
1320 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1321 }
1322
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001323 case TargetOpcode::G_LOAD:
1324 case TargetOpcode::G_STORE: {
Tim Northover0f140c72016-09-09 11:46:34 +00001325 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001326
Tim Northover5ae83502016-09-15 09:20:34 +00001327 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001328 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1329 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001330 return false;
1331 }
1332
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001333 auto &MemOp = **I.memoperands_begin();
1334 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001335 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001336 return false;
1337 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001338 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001339
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001340 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001341#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001342 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001343 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001344 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1345 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001346 assert(MRI.getType(PtrReg).isPointer() &&
1347 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001348#endif
1349
1350 const unsigned ValReg = I.getOperand(0).getReg();
1351 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1352
1353 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001354 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001355 if (NewOpc == I.getOpcode())
1356 return false;
1357
1358 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001359
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001360 uint64_t Offset = 0;
1361 auto *PtrMI = MRI.getVRegDef(PtrReg);
1362
1363 // Try to fold a GEP into our unsigned immediate addressing mode.
1364 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1365 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1366 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001367 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001368 const unsigned Scale = Log2_32(Size);
1369 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1370 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1371 I.getOperand(1).setReg(Ptr2Reg);
1372 PtrMI = MRI.getVRegDef(Ptr2Reg);
1373 Offset = Imm / Size;
1374 }
1375 }
1376 }
1377
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001378 // If we haven't folded anything into our addressing mode yet, try to fold
1379 // a frame index into the base+offset.
1380 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1381 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1382
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001383 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001384
1385 // If we're storing a 0, use WZR/XZR.
1386 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1387 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1388 if (I.getOpcode() == AArch64::STRWui)
1389 I.getOperand(0).setReg(AArch64::WZR);
1390 else if (I.getOpcode() == AArch64::STRXui)
1391 I.getOperand(0).setReg(AArch64::XZR);
1392 }
1393 }
1394
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001395 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1396 }
1397
Tim Northover9dd78f82017-02-08 21:22:25 +00001398 case TargetOpcode::G_SMULH:
1399 case TargetOpcode::G_UMULH: {
1400 // Reject the various things we don't support yet.
1401 if (unsupportedBinOp(I, RBI, MRI, TRI))
1402 return false;
1403
1404 const unsigned DefReg = I.getOperand(0).getReg();
1405 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1406
1407 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001408 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001409 return false;
1410 }
1411
1412 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001413 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1414 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001415 return false;
1416 }
1417
1418 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1419 : AArch64::UMULHrr;
1420 I.setDesc(TII.get(NewOpc));
1421
1422 // Now that we selected an opcode, we need to constrain the register
1423 // operands to use appropriate classes.
1424 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1425 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001426 case TargetOpcode::G_FADD:
1427 case TargetOpcode::G_FSUB:
1428 case TargetOpcode::G_FMUL:
1429 case TargetOpcode::G_FDIV:
1430
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001431 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001432 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1433 return selectVectorASHR(I, MRI);
1434 LLVM_FALLTHROUGH;
1435 case TargetOpcode::G_SHL:
1436 if (Opcode == TargetOpcode::G_SHL &&
1437 MRI.getType(I.getOperand(0).getReg()).isVector())
1438 return selectVectorSHL(I, MRI);
1439 LLVM_FALLTHROUGH;
1440 case TargetOpcode::G_OR:
1441 case TargetOpcode::G_LSHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001442 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001443 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001444 if (unsupportedBinOp(I, RBI, MRI, TRI))
1445 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001446
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001447 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001448
1449 const unsigned DefReg = I.getOperand(0).getReg();
1450 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1451
1452 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1453 if (NewOpc == I.getOpcode())
1454 return false;
1455
1456 I.setDesc(TII.get(NewOpc));
1457 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001458
1459 // Now that we selected an opcode, we need to constrain the register
1460 // operands to use appropriate classes.
1461 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1462 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001463
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001464 case TargetOpcode::G_UADDO: {
1465 // TODO: Support other types.
1466 unsigned OpSize = Ty.getSizeInBits();
1467 if (OpSize != 32 && OpSize != 64) {
1468 LLVM_DEBUG(
1469 dbgs()
1470 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1471 return false;
1472 }
1473
1474 // TODO: Support vectors.
1475 if (Ty.isVector()) {
1476 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1477 return false;
1478 }
1479
1480 // Add and set the set condition flag.
1481 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1482 MachineIRBuilder MIRBuilder(I);
1483 auto AddsMI = MIRBuilder.buildInstr(
1484 AddsOpc, {I.getOperand(0).getReg()},
1485 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1486 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1487
1488 // Now, put the overflow result in the register given by the first operand
1489 // to the G_UADDO. CSINC increments the result when the predicate is false,
1490 // so to get the increment when it's true, we need to use the inverse. In
1491 // this case, we want to increment when carry is set.
1492 auto CsetMI = MIRBuilder
1493 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1494 {AArch64::WZR, AArch64::WZR})
1495 .addImm(getInvertedCondCode(AArch64CC::HS));
1496 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1497 I.eraseFromParent();
1498 return true;
1499 }
1500
Tim Northover398c5f52017-02-14 20:56:29 +00001501 case TargetOpcode::G_PTR_MASK: {
1502 uint64_t Align = I.getOperand(2).getImm();
1503 if (Align >= 64 || Align == 0)
1504 return false;
1505
1506 uint64_t Mask = ~((1ULL << Align) - 1);
1507 I.setDesc(TII.get(AArch64::ANDXri));
1508 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1509
1510 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1511 }
Tim Northover037af52c2016-10-31 18:31:09 +00001512 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001513 case TargetOpcode::G_TRUNC: {
1514 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1515 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1516
1517 const unsigned DstReg = I.getOperand(0).getReg();
1518 const unsigned SrcReg = I.getOperand(1).getReg();
1519
1520 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1521 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1522
1523 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001524 LLVM_DEBUG(
1525 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001526 return false;
1527 }
1528
1529 if (DstRB.getID() == AArch64::GPRRegBankID) {
1530 const TargetRegisterClass *DstRC =
1531 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1532 if (!DstRC)
1533 return false;
1534
1535 const TargetRegisterClass *SrcRC =
1536 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1537 if (!SrcRC)
1538 return false;
1539
1540 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1541 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001542 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001543 return false;
1544 }
1545
1546 if (DstRC == SrcRC) {
1547 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001548 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1549 SrcTy == LLT::scalar(64)) {
1550 llvm_unreachable("TableGen can import this case");
1551 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001552 } else if (DstRC == &AArch64::GPR32RegClass &&
1553 SrcRC == &AArch64::GPR64RegClass) {
1554 I.getOperand(1).setSubReg(AArch64::sub_32);
1555 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001556 LLVM_DEBUG(
1557 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001558 return false;
1559 }
1560
1561 I.setDesc(TII.get(TargetOpcode::COPY));
1562 return true;
1563 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1564 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1565 I.setDesc(TII.get(AArch64::XTNv4i16));
1566 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1567 return true;
1568 }
1569 }
1570
1571 return false;
1572 }
1573
Tim Northover3d38b3a2016-10-11 20:50:21 +00001574 case TargetOpcode::G_ANYEXT: {
1575 const unsigned DstReg = I.getOperand(0).getReg();
1576 const unsigned SrcReg = I.getOperand(1).getReg();
1577
Quentin Colombetcb629a82016-10-12 03:57:49 +00001578 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1579 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001580 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1581 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001582 return false;
1583 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001584
Quentin Colombetcb629a82016-10-12 03:57:49 +00001585 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1586 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001587 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1588 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001589 return false;
1590 }
1591
1592 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1593
1594 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001595 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001596 return false;
1597 }
1598
Quentin Colombetcb629a82016-10-12 03:57:49 +00001599 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001600 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1601 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001602 return false;
1603 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001604 // At this point G_ANYEXT is just like a plain COPY, but we need
1605 // to explicitly form the 64-bit value if any.
1606 if (DstSize > 32) {
1607 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1608 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1609 .addDef(ExtSrc)
1610 .addImm(0)
1611 .addUse(SrcReg)
1612 .addImm(AArch64::sub_32);
1613 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001614 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001615 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001616 }
1617
1618 case TargetOpcode::G_ZEXT:
1619 case TargetOpcode::G_SEXT: {
1620 unsigned Opcode = I.getOpcode();
1621 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1622 SrcTy = MRI.getType(I.getOperand(1).getReg());
1623 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1624 const unsigned DefReg = I.getOperand(0).getReg();
1625 const unsigned SrcReg = I.getOperand(1).getReg();
1626 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1627
1628 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001629 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1630 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001631 return false;
1632 }
1633
1634 MachineInstr *ExtI;
1635 if (DstTy == LLT::scalar(64)) {
1636 // FIXME: Can we avoid manually doing this?
1637 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001638 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1639 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001640 return false;
1641 }
1642
1643 const unsigned SrcXReg =
1644 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1645 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1646 .addDef(SrcXReg)
1647 .addImm(0)
1648 .addUse(SrcReg)
1649 .addImm(AArch64::sub_32);
1650
1651 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1652 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1653 .addDef(DefReg)
1654 .addUse(SrcXReg)
1655 .addImm(0)
1656 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001657 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001658 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1659 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1660 .addDef(DefReg)
1661 .addUse(SrcReg)
1662 .addImm(0)
1663 .addImm(SrcTy.getSizeInBits() - 1);
1664 } else {
1665 return false;
1666 }
1667
1668 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1669
1670 I.eraseFromParent();
1671 return true;
1672 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001673
Tim Northover69271c62016-10-12 22:49:11 +00001674 case TargetOpcode::G_SITOFP:
1675 case TargetOpcode::G_UITOFP:
1676 case TargetOpcode::G_FPTOSI:
1677 case TargetOpcode::G_FPTOUI: {
1678 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1679 SrcTy = MRI.getType(I.getOperand(1).getReg());
1680 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1681 if (NewOpc == Opcode)
1682 return false;
1683
1684 I.setDesc(TII.get(NewOpc));
1685 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1686
1687 return true;
1688 }
1689
1690
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001691 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001692 // The importer is currently unable to import pointer types since they
1693 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001694 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001695
Daniel Sandersedd07842017-08-17 09:26:14 +00001696 case TargetOpcode::G_BITCAST:
1697 // Imported SelectionDAG rules can handle every bitcast except those that
1698 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00001699 // but we might not run an optimizer that deletes them. The other exception
1700 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1701 // of them.
1702 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00001703
Tim Northover9ac0eba2016-11-08 00:45:29 +00001704 case TargetOpcode::G_SELECT: {
1705 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001706 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1707 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001708 return false;
1709 }
1710
1711 const unsigned CondReg = I.getOperand(1).getReg();
1712 const unsigned TReg = I.getOperand(2).getReg();
1713 const unsigned FReg = I.getOperand(3).getReg();
1714
1715 unsigned CSelOpc = 0;
1716
1717 if (Ty == LLT::scalar(32)) {
1718 CSelOpc = AArch64::CSELWr;
Kristof Beylse9412b42017-01-19 13:32:14 +00001719 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
Tim Northover9ac0eba2016-11-08 00:45:29 +00001720 CSelOpc = AArch64::CSELXr;
1721 } else {
1722 return false;
1723 }
1724
1725 MachineInstr &TstMI =
1726 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1727 .addDef(AArch64::WZR)
1728 .addUse(CondReg)
1729 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1730
1731 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1732 .addDef(I.getOperand(0).getReg())
1733 .addUse(TReg)
1734 .addUse(FReg)
1735 .addImm(AArch64CC::NE);
1736
1737 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1738 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1739
1740 I.eraseFromParent();
1741 return true;
1742 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001743 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00001744 if (Ty.isVector())
1745 return selectVectorICmp(I, MRI);
1746
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001747 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001748 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1749 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001750 return false;
1751 }
1752
1753 unsigned CmpOpc = 0;
1754 unsigned ZReg = 0;
1755
1756 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1757 if (CmpTy == LLT::scalar(32)) {
1758 CmpOpc = AArch64::SUBSWrr;
1759 ZReg = AArch64::WZR;
1760 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1761 CmpOpc = AArch64::SUBSXrr;
1762 ZReg = AArch64::XZR;
1763 } else {
1764 return false;
1765 }
1766
Kristof Beyls22524402017-01-05 10:16:08 +00001767 // CSINC increments the result by one when the condition code is false.
1768 // Therefore, we have to invert the predicate to get an increment by 1 when
1769 // the predicate is true.
1770 const AArch64CC::CondCode invCC =
1771 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1772 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001773
1774 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1775 .addDef(ZReg)
1776 .addUse(I.getOperand(2).getReg())
1777 .addUse(I.getOperand(3).getReg());
1778
1779 MachineInstr &CSetMI =
1780 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1781 .addDef(I.getOperand(0).getReg())
1782 .addUse(AArch64::WZR)
1783 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001784 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001785
1786 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1787 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1788
1789 I.eraseFromParent();
1790 return true;
1791 }
1792
Tim Northover7dd378d2016-10-12 22:49:07 +00001793 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001794 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001795 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1796 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001797 return false;
1798 }
1799
1800 unsigned CmpOpc = 0;
1801 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1802 if (CmpTy == LLT::scalar(32)) {
1803 CmpOpc = AArch64::FCMPSrr;
1804 } else if (CmpTy == LLT::scalar(64)) {
1805 CmpOpc = AArch64::FCMPDrr;
1806 } else {
1807 return false;
1808 }
1809
1810 // FIXME: regbank
1811
1812 AArch64CC::CondCode CC1, CC2;
1813 changeFCMPPredToAArch64CC(
1814 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1815
1816 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1817 .addUse(I.getOperand(2).getReg())
1818 .addUse(I.getOperand(3).getReg());
1819
1820 const unsigned DefReg = I.getOperand(0).getReg();
1821 unsigned Def1Reg = DefReg;
1822 if (CC2 != AArch64CC::AL)
1823 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1824
1825 MachineInstr &CSetMI =
1826 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1827 .addDef(Def1Reg)
1828 .addUse(AArch64::WZR)
1829 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001830 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001831
1832 if (CC2 != AArch64CC::AL) {
1833 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1834 MachineInstr &CSet2MI =
1835 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1836 .addDef(Def2Reg)
1837 .addUse(AArch64::WZR)
1838 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001839 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001840 MachineInstr &OrMI =
1841 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1842 .addDef(DefReg)
1843 .addUse(Def1Reg)
1844 .addUse(Def2Reg);
1845 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1846 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1847 }
1848
1849 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1850 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1851
1852 I.eraseFromParent();
1853 return true;
1854 }
Tim Northovere9600d82017-02-08 17:57:27 +00001855 case TargetOpcode::G_VASTART:
1856 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1857 : selectVaStartAAPCS(I, MF, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001858 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001859 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001860 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001861 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001862 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1863 const unsigned DstReg = I.getOperand(0).getReg();
1864 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1865 const TargetRegisterClass *DstRC =
1866 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1867 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001868 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001869 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001870 case TargetOpcode::G_BLOCK_ADDR: {
1871 if (TM.getCodeModel() == CodeModel::Large) {
1872 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1873 I.eraseFromParent();
1874 return true;
1875 } else {
1876 I.setDesc(TII.get(AArch64::MOVaddrBA));
1877 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1878 I.getOperand(0).getReg())
1879 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1880 /* Offset */ 0, AArch64II::MO_PAGE)
1881 .addBlockAddress(
1882 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1883 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1884 I.eraseFromParent();
1885 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1886 }
1887 }
Jessica Paquette991cb392019-04-23 20:46:19 +00001888 case TargetOpcode::G_INTRINSIC_TRUNC:
1889 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00001890 case TargetOpcode::G_INTRINSIC_ROUND:
1891 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00001892 case TargetOpcode::G_BUILD_VECTOR:
1893 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001894 case TargetOpcode::G_MERGE_VALUES:
1895 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001896 case TargetOpcode::G_UNMERGE_VALUES:
1897 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001898 case TargetOpcode::G_SHUFFLE_VECTOR:
1899 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001900 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1901 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001902 case TargetOpcode::G_INSERT_VECTOR_ELT:
1903 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00001904 case TargetOpcode::G_CONCAT_VECTORS:
1905 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001906 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001907
1908 return false;
1909}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00001910
Jessica Paquette991cb392019-04-23 20:46:19 +00001911bool AArch64InstructionSelector::selectIntrinsicTrunc(
1912 MachineInstr &I, MachineRegisterInfo &MRI) const {
1913 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
1914
1915 // Select the correct opcode.
1916 unsigned Opc = 0;
1917 if (!SrcTy.isVector()) {
1918 switch (SrcTy.getSizeInBits()) {
1919 default:
1920 case 16:
1921 Opc = AArch64::FRINTZHr;
1922 break;
1923 case 32:
1924 Opc = AArch64::FRINTZSr;
1925 break;
1926 case 64:
1927 Opc = AArch64::FRINTZDr;
1928 break;
1929 }
1930 } else {
1931 unsigned NumElts = SrcTy.getNumElements();
1932 switch (SrcTy.getElementType().getSizeInBits()) {
1933 default:
1934 break;
1935 case 16:
1936 if (NumElts == 4)
1937 Opc = AArch64::FRINTZv4f16;
1938 else if (NumElts == 8)
1939 Opc = AArch64::FRINTZv8f16;
1940 break;
1941 case 32:
1942 if (NumElts == 2)
1943 Opc = AArch64::FRINTZv2f32;
1944 else if (NumElts == 4)
1945 Opc = AArch64::FRINTZv4f32;
1946 break;
1947 case 64:
1948 if (NumElts == 2)
1949 Opc = AArch64::FRINTZv2f64;
1950 break;
1951 }
1952 }
1953
1954 if (!Opc) {
1955 // Didn't get an opcode above, bail.
1956 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
1957 return false;
1958 }
1959
1960 // Legalization would have set us up perfectly for this; we just need to
1961 // set the opcode and move on.
1962 I.setDesc(TII.get(Opc));
1963 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1964}
1965
Jessica Paquette4fe75742019-04-23 23:03:03 +00001966bool AArch64InstructionSelector::selectIntrinsicRound(
1967 MachineInstr &I, MachineRegisterInfo &MRI) const {
1968 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
1969
1970 // Select the correct opcode.
1971 unsigned Opc = 0;
1972 if (!SrcTy.isVector()) {
1973 switch (SrcTy.getSizeInBits()) {
1974 default:
1975 case 16:
1976 Opc = AArch64::FRINTAHr;
1977 break;
1978 case 32:
1979 Opc = AArch64::FRINTASr;
1980 break;
1981 case 64:
1982 Opc = AArch64::FRINTADr;
1983 break;
1984 }
1985 } else {
1986 unsigned NumElts = SrcTy.getNumElements();
1987 switch (SrcTy.getElementType().getSizeInBits()) {
1988 default:
1989 break;
1990 case 16:
1991 if (NumElts == 4)
1992 Opc = AArch64::FRINTAv4f16;
1993 else if (NumElts == 8)
1994 Opc = AArch64::FRINTAv8f16;
1995 break;
1996 case 32:
1997 if (NumElts == 2)
1998 Opc = AArch64::FRINTAv2f32;
1999 else if (NumElts == 4)
2000 Opc = AArch64::FRINTAv4f32;
2001 break;
2002 case 64:
2003 if (NumElts == 2)
2004 Opc = AArch64::FRINTAv2f64;
2005 break;
2006 }
2007 }
2008
2009 if (!Opc) {
2010 // Didn't get an opcode above, bail.
2011 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2012 return false;
2013 }
2014
2015 // Legalization would have set us up perfectly for this; we just need to
2016 // set the opcode and move on.
2017 I.setDesc(TII.get(Opc));
2018 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2019}
2020
Amara Emerson9bf092d2019-04-09 21:22:43 +00002021bool AArch64InstructionSelector::selectVectorICmp(
2022 MachineInstr &I, MachineRegisterInfo &MRI) const {
2023 unsigned DstReg = I.getOperand(0).getReg();
2024 LLT DstTy = MRI.getType(DstReg);
2025 unsigned SrcReg = I.getOperand(2).getReg();
2026 unsigned Src2Reg = I.getOperand(3).getReg();
2027 LLT SrcTy = MRI.getType(SrcReg);
2028
2029 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2030 unsigned NumElts = DstTy.getNumElements();
2031
2032 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2033 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2034 // Third index is cc opcode:
2035 // 0 == eq
2036 // 1 == ugt
2037 // 2 == uge
2038 // 3 == ult
2039 // 4 == ule
2040 // 5 == sgt
2041 // 6 == sge
2042 // 7 == slt
2043 // 8 == sle
2044 // ne is done by negating 'eq' result.
2045
2046 // This table below assumes that for some comparisons the operands will be
2047 // commuted.
2048 // ult op == commute + ugt op
2049 // ule op == commute + uge op
2050 // slt op == commute + sgt op
2051 // sle op == commute + sge op
2052 unsigned PredIdx = 0;
2053 bool SwapOperands = false;
2054 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2055 switch (Pred) {
2056 case CmpInst::ICMP_NE:
2057 case CmpInst::ICMP_EQ:
2058 PredIdx = 0;
2059 break;
2060 case CmpInst::ICMP_UGT:
2061 PredIdx = 1;
2062 break;
2063 case CmpInst::ICMP_UGE:
2064 PredIdx = 2;
2065 break;
2066 case CmpInst::ICMP_ULT:
2067 PredIdx = 3;
2068 SwapOperands = true;
2069 break;
2070 case CmpInst::ICMP_ULE:
2071 PredIdx = 4;
2072 SwapOperands = true;
2073 break;
2074 case CmpInst::ICMP_SGT:
2075 PredIdx = 5;
2076 break;
2077 case CmpInst::ICMP_SGE:
2078 PredIdx = 6;
2079 break;
2080 case CmpInst::ICMP_SLT:
2081 PredIdx = 7;
2082 SwapOperands = true;
2083 break;
2084 case CmpInst::ICMP_SLE:
2085 PredIdx = 8;
2086 SwapOperands = true;
2087 break;
2088 default:
2089 llvm_unreachable("Unhandled icmp predicate");
2090 return false;
2091 }
2092
2093 // This table obviously should be tablegen'd when we have our GISel native
2094 // tablegen selector.
2095
2096 static const unsigned OpcTable[4][4][9] = {
2097 {
2098 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2099 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2100 0 /* invalid */},
2101 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2102 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2103 0 /* invalid */},
2104 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2105 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2106 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2107 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2108 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2109 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2110 },
2111 {
2112 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2113 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2114 0 /* invalid */},
2115 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2116 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2117 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2118 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2119 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2120 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2121 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2122 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2123 0 /* invalid */}
2124 },
2125 {
2126 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2127 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2128 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2129 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2130 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2131 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2132 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2133 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2134 0 /* invalid */},
2135 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2136 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2137 0 /* invalid */}
2138 },
2139 {
2140 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2141 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2142 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2143 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2144 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2145 0 /* invalid */},
2146 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2147 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2148 0 /* invalid */},
2149 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2150 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2151 0 /* invalid */}
2152 },
2153 };
2154 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2155 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2156 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2157 if (!Opc) {
2158 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2159 return false;
2160 }
2161
2162 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2163 const TargetRegisterClass *SrcRC =
2164 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2165 if (!SrcRC) {
2166 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2167 return false;
2168 }
2169
2170 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2171 if (SrcTy.getSizeInBits() == 128)
2172 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2173
2174 if (SwapOperands)
2175 std::swap(SrcReg, Src2Reg);
2176
2177 MachineIRBuilder MIB(I);
2178 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2179 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2180
2181 // Invert if we had a 'ne' cc.
2182 if (NotOpc) {
2183 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2184 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2185 } else {
2186 MIB.buildCopy(DstReg, Cmp.getReg(0));
2187 }
2188 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2189 I.eraseFromParent();
2190 return true;
2191}
2192
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002193MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00002194 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002195 MachineIRBuilder &MIRBuilder) const {
2196 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002197
2198 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002199 auto Ins =
2200 MIRBuilder
2201 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2202 .addImm(SubregIndex);
2203 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2204 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2205 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002206 };
2207
Amara Emerson8acb0d92019-03-04 19:16:00 +00002208 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002209 case 16:
2210 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002211 case 32:
2212 return BuildFn(AArch64::ssub);
2213 case 64:
2214 return BuildFn(AArch64::dsub);
2215 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002216 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002217 }
2218}
2219
Amara Emerson8cb186c2018-12-20 01:11:04 +00002220bool AArch64InstructionSelector::selectMergeValues(
2221 MachineInstr &I, MachineRegisterInfo &MRI) const {
2222 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2223 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2224 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2225 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2226
2227 // At the moment we only support merging two s32s into an s64.
2228 if (I.getNumOperands() != 3)
2229 return false;
2230 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2231 return false;
2232 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2233 if (RB.getID() != AArch64::GPRRegBankID)
2234 return false;
2235
2236 auto *DstRC = &AArch64::GPR64RegClass;
2237 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
2238 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2239 TII.get(TargetOpcode::SUBREG_TO_REG))
2240 .addDef(SubToRegDef)
2241 .addImm(0)
2242 .addUse(I.getOperand(1).getReg())
2243 .addImm(AArch64::sub_32);
2244 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2245 // Need to anyext the second scalar before we can use bfm
2246 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2247 TII.get(TargetOpcode::SUBREG_TO_REG))
2248 .addDef(SubToRegDef2)
2249 .addImm(0)
2250 .addUse(I.getOperand(2).getReg())
2251 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002252 MachineInstr &BFM =
2253 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002254 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002255 .addUse(SubToRegDef)
2256 .addUse(SubToRegDef2)
2257 .addImm(32)
2258 .addImm(31);
2259 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2260 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2261 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2262 I.eraseFromParent();
2263 return true;
2264}
2265
Jessica Paquette607774c2019-03-11 22:18:01 +00002266static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2267 const unsigned EltSize) {
2268 // Choose a lane copy opcode and subregister based off of the size of the
2269 // vector's elements.
2270 switch (EltSize) {
2271 case 16:
2272 CopyOpc = AArch64::CPYi16;
2273 ExtractSubReg = AArch64::hsub;
2274 break;
2275 case 32:
2276 CopyOpc = AArch64::CPYi32;
2277 ExtractSubReg = AArch64::ssub;
2278 break;
2279 case 64:
2280 CopyOpc = AArch64::CPYi64;
2281 ExtractSubReg = AArch64::dsub;
2282 break;
2283 default:
2284 // Unknown size, bail out.
2285 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2286 return false;
2287 }
2288 return true;
2289}
2290
Amara Emersond61b89b2019-03-14 22:48:18 +00002291MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2292 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2293 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2294 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2295 unsigned CopyOpc = 0;
2296 unsigned ExtractSubReg = 0;
2297 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2298 LLVM_DEBUG(
2299 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2300 return nullptr;
2301 }
2302
2303 const TargetRegisterClass *DstRC =
2304 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2305 if (!DstRC) {
2306 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2307 return nullptr;
2308 }
2309
2310 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2311 const LLT &VecTy = MRI.getType(VecReg);
2312 const TargetRegisterClass *VecRC =
2313 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2314 if (!VecRC) {
2315 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2316 return nullptr;
2317 }
2318
2319 // The register that we're going to copy into.
2320 unsigned InsertReg = VecReg;
2321 if (!DstReg)
2322 DstReg = MRI.createVirtualRegister(DstRC);
2323 // If the lane index is 0, we just use a subregister COPY.
2324 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002325 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2326 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002327 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002328 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002329 }
2330
2331 // Lane copies require 128-bit wide registers. If we're dealing with an
2332 // unpacked vector, then we need to move up to that width. Insert an implicit
2333 // def and a subregister insert to get us there.
2334 if (VecTy.getSizeInBits() != 128) {
2335 MachineInstr *ScalarToVector = emitScalarToVector(
2336 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2337 if (!ScalarToVector)
2338 return nullptr;
2339 InsertReg = ScalarToVector->getOperand(0).getReg();
2340 }
2341
2342 MachineInstr *LaneCopyMI =
2343 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2344 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2345
2346 // Make sure that we actually constrain the initial copy.
2347 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2348 return LaneCopyMI;
2349}
2350
Jessica Paquette607774c2019-03-11 22:18:01 +00002351bool AArch64InstructionSelector::selectExtractElt(
2352 MachineInstr &I, MachineRegisterInfo &MRI) const {
2353 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2354 "unexpected opcode!");
2355 unsigned DstReg = I.getOperand(0).getReg();
2356 const LLT NarrowTy = MRI.getType(DstReg);
2357 const unsigned SrcReg = I.getOperand(1).getReg();
2358 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002359 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002360 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2361 "source register size too small!");
2362 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2363
2364 // Need the lane index to determine the correct copy opcode.
2365 MachineOperand &LaneIdxOp = I.getOperand(2);
2366 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2367
2368 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2369 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2370 return false;
2371 }
2372
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002373 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002374 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2375 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002376 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002377 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002378
Jessica Paquette607774c2019-03-11 22:18:01 +00002379 MachineIRBuilder MIRBuilder(I);
2380
Amara Emersond61b89b2019-03-14 22:48:18 +00002381 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2382 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2383 LaneIdx, MIRBuilder);
2384 if (!Extract)
2385 return false;
2386
2387 I.eraseFromParent();
2388 return true;
2389}
2390
2391bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2392 MachineInstr &I, MachineRegisterInfo &MRI) const {
2393 unsigned NumElts = I.getNumOperands() - 1;
2394 unsigned SrcReg = I.getOperand(NumElts).getReg();
2395 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2396 const LLT SrcTy = MRI.getType(SrcReg);
2397
2398 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2399 if (SrcTy.getSizeInBits() > 128) {
2400 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2401 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002402 }
2403
Amara Emersond61b89b2019-03-14 22:48:18 +00002404 MachineIRBuilder MIB(I);
2405
2406 // We implement a split vector operation by treating the sub-vectors as
2407 // scalars and extracting them.
2408 const RegisterBank &DstRB =
2409 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2410 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2411 unsigned Dst = I.getOperand(OpIdx).getReg();
2412 MachineInstr *Extract =
2413 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2414 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002415 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002416 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002417 I.eraseFromParent();
2418 return true;
2419}
2420
Jessica Paquette245047d2019-01-24 22:00:41 +00002421bool AArch64InstructionSelector::selectUnmergeValues(
2422 MachineInstr &I, MachineRegisterInfo &MRI) const {
2423 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2424 "unexpected opcode");
2425
2426 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2427 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2428 AArch64::FPRRegBankID ||
2429 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2430 AArch64::FPRRegBankID) {
2431 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2432 "currently unsupported.\n");
2433 return false;
2434 }
2435
2436 // The last operand is the vector source register, and every other operand is
2437 // a register to unpack into.
2438 unsigned NumElts = I.getNumOperands() - 1;
2439 unsigned SrcReg = I.getOperand(NumElts).getReg();
2440 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2441 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002442 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002443 assert(WideTy.isVector() && "can only unmerge from vector types!");
2444 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2445 "source register size too small!");
2446
Amara Emersond61b89b2019-03-14 22:48:18 +00002447 if (!NarrowTy.isScalar())
2448 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002449
Amara Emerson3739a202019-03-15 21:59:50 +00002450 MachineIRBuilder MIB(I);
2451
Jessica Paquette245047d2019-01-24 22:00:41 +00002452 // Choose a lane copy opcode and subregister based off of the size of the
2453 // vector's elements.
2454 unsigned CopyOpc = 0;
2455 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002456 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002457 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002458
2459 // Set up for the lane copies.
2460 MachineBasicBlock &MBB = *I.getParent();
2461
2462 // Stores the registers we'll be copying from.
2463 SmallVector<unsigned, 4> InsertRegs;
2464
2465 // We'll use the first register twice, so we only need NumElts-1 registers.
2466 unsigned NumInsertRegs = NumElts - 1;
2467
2468 // If our elements fit into exactly 128 bits, then we can copy from the source
2469 // directly. Otherwise, we need to do a bit of setup with some subregister
2470 // inserts.
2471 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2472 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2473 } else {
2474 // No. We have to perform subregister inserts. For each insert, create an
2475 // implicit def and a subregister insert, and save the register we create.
2476 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2477 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2478 MachineInstr &ImpDefMI =
2479 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2480 ImpDefReg);
2481
2482 // Now, create the subregister insert from SrcReg.
2483 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2484 MachineInstr &InsMI =
2485 *BuildMI(MBB, I, I.getDebugLoc(),
2486 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2487 .addUse(ImpDefReg)
2488 .addUse(SrcReg)
2489 .addImm(AArch64::dsub);
2490
2491 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2492 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2493
2494 // Save the register so that we can copy from it after.
2495 InsertRegs.push_back(InsertReg);
2496 }
2497 }
2498
2499 // Now that we've created any necessary subregister inserts, we can
2500 // create the copies.
2501 //
2502 // Perform the first copy separately as a subregister copy.
2503 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002504 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2505 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002506 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002507
2508 // Now, perform the remaining copies as vector lane copies.
2509 unsigned LaneIdx = 1;
2510 for (unsigned InsReg : InsertRegs) {
2511 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2512 MachineInstr &CopyInst =
2513 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2514 .addUse(InsReg)
2515 .addImm(LaneIdx);
2516 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2517 ++LaneIdx;
2518 }
2519
2520 // Separately constrain the first copy's destination. Because of the
2521 // limitation in constrainOperandRegClass, we can't guarantee that this will
2522 // actually be constrained. So, do it ourselves using the second operand.
2523 const TargetRegisterClass *RC =
2524 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2525 if (!RC) {
2526 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2527 return false;
2528 }
2529
2530 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2531 I.eraseFromParent();
2532 return true;
2533}
2534
Amara Emerson2ff22982019-03-14 22:48:15 +00002535bool AArch64InstructionSelector::selectConcatVectors(
2536 MachineInstr &I, MachineRegisterInfo &MRI) const {
2537 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2538 "Unexpected opcode");
2539 unsigned Dst = I.getOperand(0).getReg();
2540 unsigned Op1 = I.getOperand(1).getReg();
2541 unsigned Op2 = I.getOperand(2).getReg();
2542 MachineIRBuilder MIRBuilder(I);
2543 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2544 if (!ConcatMI)
2545 return false;
2546 I.eraseFromParent();
2547 return true;
2548}
2549
Amara Emerson1abe05c2019-02-21 20:20:16 +00002550void AArch64InstructionSelector::collectShuffleMaskIndices(
2551 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00002552 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00002553 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2554 assert(
2555 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2556 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2557 // Find the constant indices.
2558 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2559 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2560 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2561 // Look through copies.
2562 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2563 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2564 assert(ScalarDef && "Could not find def of copy operand");
2565 }
Amara Emerson2806fd02019-04-12 21:31:21 +00002566 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2567 // This be an undef if not a constant.
2568 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2569 Idxs.push_back(None);
2570 } else {
2571 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2572 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002573 }
2574}
2575
2576unsigned
2577AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2578 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00002579 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00002580 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2581 if (Align == 0)
2582 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2583
2584 MachineConstantPool *MCP = MF.getConstantPool();
2585 return MCP->getConstantPoolIndex(CPVal, Align);
2586}
2587
2588MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2589 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2590 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2591
2592 auto Adrp =
2593 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2594 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002595
2596 MachineInstr *LoadMI = nullptr;
2597 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2598 case 16:
2599 LoadMI =
2600 &*MIRBuilder
2601 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2602 .addConstantPoolIndex(CPIdx, 0,
2603 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2604 break;
2605 case 8:
2606 LoadMI = &*MIRBuilder
2607 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2608 .addConstantPoolIndex(
2609 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2610 break;
2611 default:
2612 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2613 << *CPVal->getType());
2614 return nullptr;
2615 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002616 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002617 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2618 return LoadMI;
2619}
2620
2621/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2622/// size and RB.
2623static std::pair<unsigned, unsigned>
2624getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2625 unsigned Opc, SubregIdx;
2626 if (RB.getID() == AArch64::GPRRegBankID) {
2627 if (EltSize == 32) {
2628 Opc = AArch64::INSvi32gpr;
2629 SubregIdx = AArch64::ssub;
2630 } else if (EltSize == 64) {
2631 Opc = AArch64::INSvi64gpr;
2632 SubregIdx = AArch64::dsub;
2633 } else {
2634 llvm_unreachable("invalid elt size!");
2635 }
2636 } else {
2637 if (EltSize == 8) {
2638 Opc = AArch64::INSvi8lane;
2639 SubregIdx = AArch64::bsub;
2640 } else if (EltSize == 16) {
2641 Opc = AArch64::INSvi16lane;
2642 SubregIdx = AArch64::hsub;
2643 } else if (EltSize == 32) {
2644 Opc = AArch64::INSvi32lane;
2645 SubregIdx = AArch64::ssub;
2646 } else if (EltSize == 64) {
2647 Opc = AArch64::INSvi64lane;
2648 SubregIdx = AArch64::dsub;
2649 } else {
2650 llvm_unreachable("invalid elt size!");
2651 }
2652 }
2653 return std::make_pair(Opc, SubregIdx);
2654}
2655
2656MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002657 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2658 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002659 // We implement a vector concat by:
2660 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2661 // 2. Insert the upper vector into the destination's upper element
2662 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2663 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2664
2665 const LLT Op1Ty = MRI.getType(Op1);
2666 const LLT Op2Ty = MRI.getType(Op2);
2667
2668 if (Op1Ty != Op2Ty) {
2669 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2670 return nullptr;
2671 }
2672 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2673
2674 if (Op1Ty.getSizeInBits() >= 128) {
2675 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2676 return nullptr;
2677 }
2678
2679 // At the moment we just support 64 bit vector concats.
2680 if (Op1Ty.getSizeInBits() != 64) {
2681 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2682 return nullptr;
2683 }
2684
2685 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2686 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2687 const TargetRegisterClass *DstRC =
2688 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2689
2690 MachineInstr *WidenedOp1 =
2691 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2692 MachineInstr *WidenedOp2 =
2693 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2694 if (!WidenedOp1 || !WidenedOp2) {
2695 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2696 return nullptr;
2697 }
2698
2699 // Now do the insert of the upper element.
2700 unsigned InsertOpc, InsSubRegIdx;
2701 std::tie(InsertOpc, InsSubRegIdx) =
2702 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2703
Amara Emerson2ff22982019-03-14 22:48:15 +00002704 if (!Dst)
2705 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002706 auto InsElt =
2707 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002708 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002709 .addImm(1) /* Lane index */
2710 .addUse(WidenedOp2->getOperand(0).getReg())
2711 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002712 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2713 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002714}
2715
Amara Emerson761ca2e2019-03-19 21:43:05 +00002716bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2717 // Try to match a vector splat operation into a dup instruction.
2718 // We're looking for this pattern:
2719 // %scalar:gpr(s64) = COPY $x0
2720 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2721 // %cst0:gpr(s32) = G_CONSTANT i32 0
2722 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2723 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2724 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2725 // %zerovec(<2 x s32>)
2726 //
2727 // ...into:
2728 // %splat = DUP %scalar
2729 // We use the regbank of the scalar to determine which kind of dup to use.
2730 MachineIRBuilder MIB(I);
2731 MachineRegisterInfo &MRI = *MIB.getMRI();
2732 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2733 using namespace TargetOpcode;
2734 using namespace MIPatternMatch;
2735
2736 // Begin matching the insert.
2737 auto *InsMI =
2738 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2739 if (!InsMI)
2740 return false;
2741 // Match the undef vector operand.
2742 auto *UndefMI =
2743 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2744 if (!UndefMI)
2745 return false;
2746 // Match the scalar being splatted.
2747 unsigned ScalarReg = InsMI->getOperand(2).getReg();
2748 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2749 // Match the index constant 0.
2750 int64_t Index = 0;
2751 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2752 return false;
2753
2754 // The shuffle's second operand doesn't matter if the mask is all zero.
2755 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2756 if (!ZeroVec)
2757 return false;
2758 int64_t Zero = 0;
2759 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2760 return false;
2761 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2762 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2763 return false; // This wasn't an all zeros vector.
2764 }
2765
2766 // We're done, now find out what kind of splat we need.
2767 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2768 LLT EltTy = VecTy.getElementType();
2769 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2770 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2771 return false;
2772 }
2773 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2774 static const unsigned OpcTable[2][2] = {
2775 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2776 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2777 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2778
2779 // For FP splats, we need to widen the scalar reg via undef too.
2780 if (IsFP) {
2781 MachineInstr *Widen = emitScalarToVector(
2782 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2783 if (!Widen)
2784 return false;
2785 ScalarReg = Widen->getOperand(0).getReg();
2786 }
2787 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
2788 if (IsFP)
2789 Dup.addImm(0);
2790 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
2791 I.eraseFromParent();
2792 return true;
2793}
2794
2795bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
2796 if (TM.getOptLevel() == CodeGenOpt::None)
2797 return false;
2798 if (tryOptVectorDup(I))
2799 return true;
2800 return false;
2801}
2802
Amara Emerson1abe05c2019-02-21 20:20:16 +00002803bool AArch64InstructionSelector::selectShuffleVector(
2804 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00002805 if (tryOptVectorShuffle(I))
2806 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002807 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2808 unsigned Src1Reg = I.getOperand(1).getReg();
2809 const LLT Src1Ty = MRI.getType(Src1Reg);
2810 unsigned Src2Reg = I.getOperand(2).getReg();
2811 const LLT Src2Ty = MRI.getType(Src2Reg);
2812
2813 MachineBasicBlock &MBB = *I.getParent();
2814 MachineFunction &MF = *MBB.getParent();
2815 LLVMContext &Ctx = MF.getFunction().getContext();
2816
2817 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2818 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00002819 // find the mask indices. If the mask element is undef, then
2820 // collectShuffleMaskIndices() will add a None entry for that index into
2821 // the list.
2822 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002823 collectShuffleMaskIndices(I, MRI, Mask);
2824 assert(!Mask.empty() && "Expected to find mask indices");
2825
2826 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2827 // it's originated from a <1 x T> type. Those should have been lowered into
2828 // G_BUILD_VECTOR earlier.
2829 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2830 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2831 return false;
2832 }
2833
2834 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2835
2836 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00002837 for (auto &MaybeVal : Mask) {
2838 // For now, any undef indexes we'll just assume to be 0. This should be
2839 // optimized in future, e.g. to select DUP etc.
2840 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002841 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2842 unsigned Offset = Byte + Val * BytesPerElt;
2843 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2844 }
2845 }
2846
Amara Emerson8acb0d92019-03-04 19:16:00 +00002847 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002848
2849 // Use a constant pool to load the index vector for TBL.
2850 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002851 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2852 if (!IndexLoad) {
2853 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2854 return false;
2855 }
2856
Amara Emerson8acb0d92019-03-04 19:16:00 +00002857 if (DstTy.getSizeInBits() != 128) {
2858 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2859 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00002860 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002861 if (!Concat) {
2862 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2863 return false;
2864 }
2865
2866 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2867 IndexLoad =
2868 emitScalarToVector(64, &AArch64::FPR128RegClass,
2869 IndexLoad->getOperand(0).getReg(), MIRBuilder);
2870
2871 auto TBL1 = MIRBuilder.buildInstr(
2872 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2873 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2874 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
2875
Amara Emerson3739a202019-03-15 21:59:50 +00002876 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00002877 MIRBuilder
2878 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2879 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002880 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2881 I.eraseFromParent();
2882 return true;
2883 }
2884
Amara Emerson1abe05c2019-02-21 20:20:16 +00002885 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2886 // Q registers for regalloc.
2887 auto RegSeq = MIRBuilder
2888 .buildInstr(TargetOpcode::REG_SEQUENCE,
2889 {&AArch64::QQRegClass}, {Src1Reg})
2890 .addImm(AArch64::qsub0)
2891 .addUse(Src2Reg)
2892 .addImm(AArch64::qsub1);
2893
2894 auto TBL2 =
2895 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2896 {RegSeq, IndexLoad->getOperand(0).getReg()});
2897 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2898 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
2899 I.eraseFromParent();
2900 return true;
2901}
2902
Jessica Paquette16d67a32019-03-13 23:22:23 +00002903MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2904 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2905 unsigned LaneIdx, const RegisterBank &RB,
2906 MachineIRBuilder &MIRBuilder) const {
2907 MachineInstr *InsElt = nullptr;
2908 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2909 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2910
2911 // Create a register to define with the insert if one wasn't passed in.
2912 if (!DstReg)
2913 DstReg = MRI.createVirtualRegister(DstRC);
2914
2915 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2916 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2917
2918 if (RB.getID() == AArch64::FPRRegBankID) {
2919 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2920 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2921 .addImm(LaneIdx)
2922 .addUse(InsSub->getOperand(0).getReg())
2923 .addImm(0);
2924 } else {
2925 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2926 .addImm(LaneIdx)
2927 .addUse(EltReg);
2928 }
2929
2930 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2931 return InsElt;
2932}
2933
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002934bool AArch64InstructionSelector::selectInsertElt(
2935 MachineInstr &I, MachineRegisterInfo &MRI) const {
2936 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2937
2938 // Get information on the destination.
2939 unsigned DstReg = I.getOperand(0).getReg();
2940 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002941 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002942
2943 // Get information on the element we want to insert into the destination.
2944 unsigned EltReg = I.getOperand(2).getReg();
2945 const LLT EltTy = MRI.getType(EltReg);
2946 unsigned EltSize = EltTy.getSizeInBits();
2947 if (EltSize < 16 || EltSize > 64)
2948 return false; // Don't support all element types yet.
2949
2950 // Find the definition of the index. Bail out if it's not defined by a
2951 // G_CONSTANT.
2952 unsigned IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00002953 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
2954 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002955 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002956 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002957
2958 // Perform the lane insert.
2959 unsigned SrcReg = I.getOperand(1).getReg();
2960 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2961 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002962
2963 if (VecSize < 128) {
2964 // If the vector we're inserting into is smaller than 128 bits, widen it
2965 // to 128 to do the insert.
2966 MachineInstr *ScalarToVec = emitScalarToVector(
2967 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
2968 if (!ScalarToVec)
2969 return false;
2970 SrcReg = ScalarToVec->getOperand(0).getReg();
2971 }
2972
2973 // Create an insert into a new FPR128 register.
2974 // Note that if our vector is already 128 bits, we end up emitting an extra
2975 // register.
2976 MachineInstr *InsMI =
2977 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2978
2979 if (VecSize < 128) {
2980 // If we had to widen to perform the insert, then we have to demote back to
2981 // the original size to get the result we want.
2982 unsigned DemoteVec = InsMI->getOperand(0).getReg();
2983 const TargetRegisterClass *RC =
2984 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
2985 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2986 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2987 return false;
2988 }
2989 unsigned SubReg = 0;
2990 if (!getSubRegForClass(RC, TRI, SubReg))
2991 return false;
2992 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2993 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
2994 << "\n");
2995 return false;
2996 }
2997 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2998 .addReg(DemoteVec, 0, SubReg);
2999 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3000 } else {
3001 // No widening needed.
3002 InsMI->getOperand(0).setReg(DstReg);
3003 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3004 }
3005
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003006 I.eraseFromParent();
3007 return true;
3008}
3009
Amara Emerson5ec14602018-12-10 18:44:58 +00003010bool AArch64InstructionSelector::selectBuildVector(
3011 MachineInstr &I, MachineRegisterInfo &MRI) const {
3012 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3013 // Until we port more of the optimized selections, for now just use a vector
3014 // insert sequence.
3015 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3016 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3017 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003018 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003019 return false; // Don't support all element types yet.
3020 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003021 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003022
3023 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003024 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003025 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3026 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003027 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003028 return false;
3029
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003030 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003031 unsigned DstSize = DstTy.getSizeInBits();
3032
3033 // Keep track of the last MI we inserted. Later on, we might be able to save
3034 // a copy using it.
3035 MachineInstr *PrevMI = nullptr;
3036 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003037 // Note that if we don't do a subregister copy, we can end up making an
3038 // extra register.
3039 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3040 MIRBuilder);
3041 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003042 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003043
3044 // If DstTy's size in bits is less than 128, then emit a subregister copy
3045 // from DstVec to the last register we've defined.
3046 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003047 // Force this to be FPR using the destination vector.
3048 const TargetRegisterClass *RC =
3049 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003050 if (!RC)
3051 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003052 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3053 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3054 return false;
3055 }
3056
3057 unsigned SubReg = 0;
3058 if (!getSubRegForClass(RC, TRI, SubReg))
3059 return false;
3060 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3061 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3062 << "\n");
3063 return false;
3064 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003065
3066 unsigned Reg = MRI.createVirtualRegister(RC);
3067 unsigned DstReg = I.getOperand(0).getReg();
3068
Amara Emerson86271782019-03-18 19:20:10 +00003069 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3070 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003071 MachineOperand &RegOp = I.getOperand(1);
3072 RegOp.setReg(Reg);
3073 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3074 } else {
3075 // We don't need a subregister copy. Save a copy by re-using the
3076 // destination register on the final insert.
3077 assert(PrevMI && "PrevMI was null?");
3078 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3079 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3080 }
3081
Amara Emerson5ec14602018-12-10 18:44:58 +00003082 I.eraseFromParent();
3083 return true;
3084}
3085
Jessica Paquette22c62152019-04-02 19:57:26 +00003086/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3087/// intrinsic.
3088static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3089 switch (NumBytesToStore) {
3090 // TODO: 1, 2, and 4 byte stores.
3091 case 8:
3092 return AArch64::STLXRX;
3093 default:
3094 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3095 << NumBytesToStore << ")\n");
3096 break;
3097 }
3098 return 0;
3099}
3100
3101bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3102 MachineInstr &I, MachineRegisterInfo &MRI) const {
3103 // Find the intrinsic ID.
3104 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3105 return Op.isIntrinsicID();
3106 });
3107 if (IntrinOp == I.operands_end())
3108 return false;
3109 unsigned IntrinID = IntrinOp->getIntrinsicID();
3110 MachineIRBuilder MIRBuilder(I);
3111
3112 // Select the instruction.
3113 switch (IntrinID) {
3114 default:
3115 return false;
3116 case Intrinsic::trap:
3117 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3118 break;
3119 case Intrinsic::aarch64_stlxr:
3120 unsigned StatReg = I.getOperand(0).getReg();
3121 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3122 "Status register must be 32 bits!");
3123 unsigned SrcReg = I.getOperand(2).getReg();
3124
3125 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3126 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3127 return false;
3128 }
3129
3130 unsigned PtrReg = I.getOperand(3).getReg();
3131 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3132
3133 // Expect only one memory operand.
3134 if (!I.hasOneMemOperand())
3135 return false;
3136
3137 const MachineMemOperand *MemOp = *I.memoperands_begin();
3138 unsigned NumBytesToStore = MemOp->getSize();
3139 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3140 if (!Opc)
3141 return false;
3142
3143 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3144 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3145 }
3146
3147 I.eraseFromParent();
3148 return true;
3149}
3150
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003151/// SelectArithImmed - Select an immediate value that can be represented as
3152/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3153/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003154InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00003155AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003156 MachineInstr &MI = *Root.getParent();
3157 MachineBasicBlock &MBB = *MI.getParent();
3158 MachineFunction &MF = *MBB.getParent();
3159 MachineRegisterInfo &MRI = MF.getRegInfo();
3160
3161 // This function is called from the addsub_shifted_imm ComplexPattern,
3162 // which lists [imm] as the list of opcode it's interested in, however
3163 // we still need to check whether the operand is actually an immediate
3164 // here because the ComplexPattern opcode list is only used in
3165 // root-level opcode matching.
3166 uint64_t Immed;
3167 if (Root.isImm())
3168 Immed = Root.getImm();
3169 else if (Root.isCImm())
3170 Immed = Root.getCImm()->getZExtValue();
3171 else if (Root.isReg()) {
3172 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3173 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003174 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003175 MachineOperand &Op1 = Def->getOperand(1);
3176 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003177 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003178 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003179 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003180 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003181
3182 unsigned ShiftAmt;
3183
3184 if (Immed >> 12 == 0) {
3185 ShiftAmt = 0;
3186 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3187 ShiftAmt = 12;
3188 Immed = Immed >> 12;
3189 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003190 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003191
3192 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003193 return {{
3194 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3195 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3196 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003197}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003198
Daniel Sandersea8711b2017-10-16 03:36:29 +00003199/// Select a "register plus unscaled signed 9-bit immediate" address. This
3200/// should only match when there is an offset that is not valid for a scaled
3201/// immediate addressing mode. The "Size" argument is the size in bytes of the
3202/// memory reference, which is needed here to know what is valid for a scaled
3203/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003204InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003205AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3206 unsigned Size) const {
3207 MachineRegisterInfo &MRI =
3208 Root.getParent()->getParent()->getParent()->getRegInfo();
3209
3210 if (!Root.isReg())
3211 return None;
3212
3213 if (!isBaseWithConstantOffset(Root, MRI))
3214 return None;
3215
3216 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3217 if (!RootDef)
3218 return None;
3219
3220 MachineOperand &OffImm = RootDef->getOperand(2);
3221 if (!OffImm.isReg())
3222 return None;
3223 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3224 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3225 return None;
3226 int64_t RHSC;
3227 MachineOperand &RHSOp1 = RHS->getOperand(1);
3228 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3229 return None;
3230 RHSC = RHSOp1.getCImm()->getSExtValue();
3231
3232 // If the offset is valid as a scaled immediate, don't match here.
3233 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3234 return None;
3235 if (RHSC >= -256 && RHSC < 256) {
3236 MachineOperand &Base = RootDef->getOperand(1);
3237 return {{
3238 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3239 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3240 }};
3241 }
3242 return None;
3243}
3244
3245/// Select a "register plus scaled unsigned 12-bit immediate" address. The
3246/// "Size" argument is the size in bytes of the memory reference, which
3247/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003248InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003249AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3250 unsigned Size) const {
3251 MachineRegisterInfo &MRI =
3252 Root.getParent()->getParent()->getParent()->getRegInfo();
3253
3254 if (!Root.isReg())
3255 return None;
3256
3257 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3258 if (!RootDef)
3259 return None;
3260
3261 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3262 return {{
3263 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3264 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3265 }};
3266 }
3267
3268 if (isBaseWithConstantOffset(Root, MRI)) {
3269 MachineOperand &LHS = RootDef->getOperand(1);
3270 MachineOperand &RHS = RootDef->getOperand(2);
3271 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3272 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3273 if (LHSDef && RHSDef) {
3274 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3275 unsigned Scale = Log2_32(Size);
3276 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3277 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00003278 return {{
3279 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3280 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3281 }};
3282
Daniel Sandersea8711b2017-10-16 03:36:29 +00003283 return {{
3284 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3285 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3286 }};
3287 }
3288 }
3289 }
3290
3291 // Before falling back to our general case, check if the unscaled
3292 // instructions can handle this. If so, that's preferable.
3293 if (selectAddrModeUnscaled(Root, Size).hasValue())
3294 return None;
3295
3296 return {{
3297 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3298 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3299 }};
3300}
3301
Volkan Kelesf7f25682018-01-16 18:44:05 +00003302void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3303 const MachineInstr &MI) const {
3304 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3305 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3306 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
3307 assert(CstVal && "Expected constant value");
3308 MIB.addImm(CstVal.getValue());
3309}
3310
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003311namespace llvm {
3312InstructionSelector *
3313createAArch64InstructionSelector(const AArch64TargetMachine &TM,
3314 AArch64Subtarget &Subtarget,
3315 AArch64RegisterBankInfo &RBI) {
3316 return new AArch64InstructionSelector(TM, Subtarget, RBI);
3317}
3318}