blob: 8a3310bf4e31ba1b4bfc4e687962b4d6e83f9f9c [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson9bf092d2019-04-09 21:22:43 +000070 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72
Amara Emerson5ec14602018-12-10 18:44:58 +000073 // Helper to generate an equivalent of scalar_to_vector into a new register,
74 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000075 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000076 const TargetRegisterClass *DstRC,
77 unsigned Scalar,
78 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000079
80 /// Emit a lane insert into \p DstReg, or a new vector register if None is
81 /// provided.
82 ///
83 /// The lane inserted into is defined by \p LaneIdx. The vector source
84 /// register is given by \p SrcReg. The register containing the element is
85 /// given by \p EltReg.
86 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
87 unsigned EltReg, unsigned LaneIdx,
88 const RegisterBank &RB,
89 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000090 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000092 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000093 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000094
Amara Emerson1abe05c2019-02-21 20:20:16 +000095 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +000096 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000097 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000098 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000099 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000100 bool selectSplitVectorUnmerge(MachineInstr &I,
101 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000102 bool selectIntrinsicWithSideEffects(MachineInstr &I,
103 MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000104 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000105 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000106 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000107 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
108 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
109 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000110
111 // Emit a vector concat operation.
112 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
113 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000114 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000115 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
116 const RegisterBank &DstRB, LLT ScalarTy,
117 unsigned VecReg, unsigned LaneIdx,
118 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000119
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000120 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000121
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000122 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
123 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000124
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000125 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000126 return selectAddrModeUnscaled(Root, 1);
127 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000128 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000129 return selectAddrModeUnscaled(Root, 2);
130 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000131 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000132 return selectAddrModeUnscaled(Root, 4);
133 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000134 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000135 return selectAddrModeUnscaled(Root, 8);
136 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000137 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000138 return selectAddrModeUnscaled(Root, 16);
139 }
140
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000141 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
142 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000143 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000144 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000145 return selectAddrModeIndexed(Root, Width / 8);
146 }
147
Volkan Kelesf7f25682018-01-16 18:44:05 +0000148 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
149
Amara Emerson1e8c1642018-07-31 00:09:02 +0000150 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
151 void materializeLargeCMVal(MachineInstr &I, const Value *V,
152 unsigned char OpFlags) const;
153
Amara Emerson761ca2e2019-03-19 21:43:05 +0000154 // Optimization methods.
155
156 // Helper function to check if a reg def is an MI with a given opcode and
157 // returns it if so.
158 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
159 MachineIRBuilder &MIB) const {
160 auto *Def = MIB.getMRI()->getVRegDef(Reg);
161 if (!Def || Def->getOpcode() != Opc)
162 return nullptr;
163 return Def;
164 }
165
166 bool tryOptVectorShuffle(MachineInstr &I) const;
167 bool tryOptVectorDup(MachineInstr &MI) const;
168
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000169 const AArch64TargetMachine &TM;
170 const AArch64Subtarget &STI;
171 const AArch64InstrInfo &TII;
172 const AArch64RegisterInfo &TRI;
173 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000174
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000175#define GET_GLOBALISEL_PREDICATES_DECL
176#include "AArch64GenGlobalISel.inc"
177#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000178
179// We declare the temporaries used by selectImpl() in the class to minimize the
180// cost of constructing placeholder values.
181#define GET_GLOBALISEL_TEMPORARIES_DECL
182#include "AArch64GenGlobalISel.inc"
183#undef GET_GLOBALISEL_TEMPORARIES_DECL
184};
185
186} // end anonymous namespace
187
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000188#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000189#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000190#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000191
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000192AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000193 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
194 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000195 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000196 TRI(*STI.getRegisterInfo()), RBI(RBI),
197#define GET_GLOBALISEL_PREDICATES_INIT
198#include "AArch64GenGlobalISel.inc"
199#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000200#define GET_GLOBALISEL_TEMPORARIES_INIT
201#include "AArch64GenGlobalISel.inc"
202#undef GET_GLOBALISEL_TEMPORARIES_INIT
203{
204}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000205
Tim Northoverfb8d9892016-10-12 22:49:15 +0000206// FIXME: This should be target-independent, inferred from the types declared
207// for each class in the bank.
208static const TargetRegisterClass *
209getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000210 const RegisterBankInfo &RBI,
211 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000212 if (RB.getID() == AArch64::GPRRegBankID) {
213 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000214 return GetAllRegSet ? &AArch64::GPR32allRegClass
215 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000216 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000217 return GetAllRegSet ? &AArch64::GPR64allRegClass
218 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000219 return nullptr;
220 }
221
222 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000223 if (Ty.getSizeInBits() <= 16)
224 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000225 if (Ty.getSizeInBits() == 32)
226 return &AArch64::FPR32RegClass;
227 if (Ty.getSizeInBits() == 64)
228 return &AArch64::FPR64RegClass;
229 if (Ty.getSizeInBits() == 128)
230 return &AArch64::FPR128RegClass;
231 return nullptr;
232 }
233
234 return nullptr;
235}
236
Jessica Paquette245047d2019-01-24 22:00:41 +0000237/// Given a register bank, and size in bits, return the smallest register class
238/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000239static const TargetRegisterClass *
240getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
241 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000242 unsigned RegBankID = RB.getID();
243
244 if (RegBankID == AArch64::GPRRegBankID) {
245 if (SizeInBits <= 32)
246 return GetAllRegSet ? &AArch64::GPR32allRegClass
247 : &AArch64::GPR32RegClass;
248 if (SizeInBits == 64)
249 return GetAllRegSet ? &AArch64::GPR64allRegClass
250 : &AArch64::GPR64RegClass;
251 }
252
253 if (RegBankID == AArch64::FPRRegBankID) {
254 switch (SizeInBits) {
255 default:
256 return nullptr;
257 case 8:
258 return &AArch64::FPR8RegClass;
259 case 16:
260 return &AArch64::FPR16RegClass;
261 case 32:
262 return &AArch64::FPR32RegClass;
263 case 64:
264 return &AArch64::FPR64RegClass;
265 case 128:
266 return &AArch64::FPR128RegClass;
267 }
268 }
269
270 return nullptr;
271}
272
273/// Returns the correct subregister to use for a given register class.
274static bool getSubRegForClass(const TargetRegisterClass *RC,
275 const TargetRegisterInfo &TRI, unsigned &SubReg) {
276 switch (TRI.getRegSizeInBits(*RC)) {
277 case 8:
278 SubReg = AArch64::bsub;
279 break;
280 case 16:
281 SubReg = AArch64::hsub;
282 break;
283 case 32:
284 if (RC == &AArch64::GPR32RegClass)
285 SubReg = AArch64::sub_32;
286 else
287 SubReg = AArch64::ssub;
288 break;
289 case 64:
290 SubReg = AArch64::dsub;
291 break;
292 default:
293 LLVM_DEBUG(
294 dbgs() << "Couldn't find appropriate subregister for register class.");
295 return false;
296 }
297
298 return true;
299}
300
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000301/// Check whether \p I is a currently unsupported binary operation:
302/// - it has an unsized type
303/// - an operand is not a vreg
304/// - all operands are not in the same bank
305/// These are checks that should someday live in the verifier, but right now,
306/// these are mostly limitations of the aarch64 selector.
307static bool unsupportedBinOp(const MachineInstr &I,
308 const AArch64RegisterBankInfo &RBI,
309 const MachineRegisterInfo &MRI,
310 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000311 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000312 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000313 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000314 return true;
315 }
316
317 const RegisterBank *PrevOpBank = nullptr;
318 for (auto &MO : I.operands()) {
319 // FIXME: Support non-register operands.
320 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000321 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000322 return true;
323 }
324
325 // FIXME: Can generic operations have physical registers operands? If
326 // so, this will need to be taught about that, and we'll need to get the
327 // bank out of the minimal class for the register.
328 // Either way, this needs to be documented (and possibly verified).
329 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000330 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000331 return true;
332 }
333
334 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
335 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000336 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000337 return true;
338 }
339
340 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000341 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000342 return true;
343 }
344 PrevOpBank = OpBank;
345 }
346 return false;
347}
348
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000349/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000350/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000351/// and of size \p OpSize.
352/// \returns \p GenericOpc if the combination is unsupported.
353static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
354 unsigned OpSize) {
355 switch (RegBankID) {
356 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000357 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000358 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000359 case TargetOpcode::G_SHL:
360 return AArch64::LSLVWr;
361 case TargetOpcode::G_LSHR:
362 return AArch64::LSRVWr;
363 case TargetOpcode::G_ASHR:
364 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000365 default:
366 return GenericOpc;
367 }
Tim Northover55782222016-10-18 20:03:48 +0000368 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000369 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000370 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000371 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000372 case TargetOpcode::G_SHL:
373 return AArch64::LSLVXr;
374 case TargetOpcode::G_LSHR:
375 return AArch64::LSRVXr;
376 case TargetOpcode::G_ASHR:
377 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000378 default:
379 return GenericOpc;
380 }
381 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000382 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000383 case AArch64::FPRRegBankID:
384 switch (OpSize) {
385 case 32:
386 switch (GenericOpc) {
387 case TargetOpcode::G_FADD:
388 return AArch64::FADDSrr;
389 case TargetOpcode::G_FSUB:
390 return AArch64::FSUBSrr;
391 case TargetOpcode::G_FMUL:
392 return AArch64::FMULSrr;
393 case TargetOpcode::G_FDIV:
394 return AArch64::FDIVSrr;
395 default:
396 return GenericOpc;
397 }
398 case 64:
399 switch (GenericOpc) {
400 case TargetOpcode::G_FADD:
401 return AArch64::FADDDrr;
402 case TargetOpcode::G_FSUB:
403 return AArch64::FSUBDrr;
404 case TargetOpcode::G_FMUL:
405 return AArch64::FMULDrr;
406 case TargetOpcode::G_FDIV:
407 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000408 case TargetOpcode::G_OR:
409 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000410 default:
411 return GenericOpc;
412 }
413 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000414 break;
415 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000416 return GenericOpc;
417}
418
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000419/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
420/// appropriate for the (value) register bank \p RegBankID and of memory access
421/// size \p OpSize. This returns the variant with the base+unsigned-immediate
422/// addressing mode (e.g., LDRXui).
423/// \returns \p GenericOpc if the combination is unsupported.
424static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
425 unsigned OpSize) {
426 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
427 switch (RegBankID) {
428 case AArch64::GPRRegBankID:
429 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000430 case 8:
431 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
432 case 16:
433 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000434 case 32:
435 return isStore ? AArch64::STRWui : AArch64::LDRWui;
436 case 64:
437 return isStore ? AArch64::STRXui : AArch64::LDRXui;
438 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000439 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000440 case AArch64::FPRRegBankID:
441 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000442 case 8:
443 return isStore ? AArch64::STRBui : AArch64::LDRBui;
444 case 16:
445 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000446 case 32:
447 return isStore ? AArch64::STRSui : AArch64::LDRSui;
448 case 64:
449 return isStore ? AArch64::STRDui : AArch64::LDRDui;
450 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000451 break;
452 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000453 return GenericOpc;
454}
455
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000456#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000457/// Helper function that verifies that we have a valid copy at the end of
458/// selectCopy. Verifies that the source and dest have the expected sizes and
459/// then returns true.
460static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
461 const MachineRegisterInfo &MRI,
462 const TargetRegisterInfo &TRI,
463 const RegisterBankInfo &RBI) {
464 const unsigned DstReg = I.getOperand(0).getReg();
465 const unsigned SrcReg = I.getOperand(1).getReg();
466 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
467 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000468
Jessica Paquette245047d2019-01-24 22:00:41 +0000469 // Make sure the size of the source and dest line up.
470 assert(
471 (DstSize == SrcSize ||
472 // Copies are a mean to setup initial types, the number of
473 // bits may not exactly match.
474 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
475 // Copies are a mean to copy bits around, as long as we are
476 // on the same register class, that's fine. Otherwise, that
477 // means we need some SUBREG_TO_REG or AND & co.
478 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
479 "Copy with different width?!");
480
481 // Check the size of the destination.
482 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
483 "GPRs cannot get more than 64-bit width values");
484
485 return true;
486}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000487#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000488
489/// Helper function for selectCopy. Inserts a subregister copy from
490/// \p *From to \p *To, linking it up to \p I.
491///
492/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
493///
494/// CopyReg (From class) = COPY SrcReg
495/// SubRegCopy (To class) = COPY CopyReg:SubReg
496/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000497static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000498 const RegisterBankInfo &RBI, unsigned SrcReg,
499 const TargetRegisterClass *From,
500 const TargetRegisterClass *To,
501 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000502 MachineIRBuilder MIB(I);
503 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000504 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
505 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000506 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000507 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000508
509 // It's possible that the destination register won't be constrained. Make
510 // sure that happens.
511 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
512 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
513
Amara Emersondb211892018-02-20 05:11:57 +0000514 return true;
515}
516
Quentin Colombetcb629a82016-10-12 03:57:49 +0000517static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
518 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
519 const RegisterBankInfo &RBI) {
520
521 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000522 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000523 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
524 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
525 const TargetRegisterClass *DstRC = getMinClassForRegBank(
526 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
527 if (!DstRC) {
528 LLVM_DEBUG(dbgs() << "Unexpected dest size "
529 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000530 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000531 }
532
Jessica Paquette245047d2019-01-24 22:00:41 +0000533 // A couple helpers below, for making sure that the copy we produce is valid.
534
535 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
536 // to verify that the src and dst are the same size, since that's handled by
537 // the SUBREG_TO_REG.
538 bool KnownValid = false;
539
540 // Returns true, or asserts if something we don't expect happens. Instead of
541 // returning true, we return isValidCopy() to ensure that we verify the
542 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000543 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000544 // If we have a bitcast or something, we can't have physical registers.
545 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000546 (I.isCopy() ||
547 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
548 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
549 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000550 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000551 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000552 return true;
553 };
554
555 // Is this a copy? If so, then we may need to insert a subregister copy, or
556 // a SUBREG_TO_REG.
557 if (I.isCopy()) {
558 // Yes. Check if there's anything to fix up.
559 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
560 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
Amara Emerson7e9f3482018-02-18 17:10:49 +0000561 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000562 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
563 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000564 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000565
566 // Is this a cross-bank copy?
567 if (DstRegBank.getID() != SrcRegBank.getID()) {
568 // If we're doing a cross-bank copy on different-sized registers, we need
569 // to do a bit more work.
570 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
571 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
572
573 if (SrcSize > DstSize) {
574 // We're doing a cross-bank copy into a smaller register. We need a
575 // subregister copy. First, get a register class that's on the same bank
576 // as the destination, but the same size as the source.
577 const TargetRegisterClass *SubregRC =
578 getMinClassForRegBank(DstRegBank, SrcSize, true);
579 assert(SubregRC && "Didn't get a register class for subreg?");
580
581 // Get the appropriate subregister for the destination.
582 unsigned SubReg = 0;
583 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
584 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
585 return false;
586 }
587
588 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000589 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000590 return CheckCopy();
591 }
592
593 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
594 SrcSize == 16) {
595 // Special case for FPR16 to GPR32.
596 // FIXME: This can probably be generalized like the above case.
597 unsigned PromoteReg =
598 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
599 BuildMI(*I.getParent(), I, I.getDebugLoc(),
600 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
601 .addImm(0)
602 .addUse(SrcReg)
603 .addImm(AArch64::hsub);
604 MachineOperand &RegOp = I.getOperand(1);
605 RegOp.setReg(PromoteReg);
606
607 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
608 KnownValid = true;
609 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000610 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000611
612 // If the destination is a physical register, then there's nothing to
613 // change, so we're done.
614 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
615 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000616 }
617
Jessica Paquette245047d2019-01-24 22:00:41 +0000618 // No need to constrain SrcReg. It will get constrained when we hit another
619 // of its use or its defs. Copies do not have constraints.
620 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000621 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
622 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000623 return false;
624 }
625 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000626 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000627}
628
Tim Northover69271c62016-10-12 22:49:11 +0000629static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
630 if (!DstTy.isScalar() || !SrcTy.isScalar())
631 return GenericOpc;
632
633 const unsigned DstSize = DstTy.getSizeInBits();
634 const unsigned SrcSize = SrcTy.getSizeInBits();
635
636 switch (DstSize) {
637 case 32:
638 switch (SrcSize) {
639 case 32:
640 switch (GenericOpc) {
641 case TargetOpcode::G_SITOFP:
642 return AArch64::SCVTFUWSri;
643 case TargetOpcode::G_UITOFP:
644 return AArch64::UCVTFUWSri;
645 case TargetOpcode::G_FPTOSI:
646 return AArch64::FCVTZSUWSr;
647 case TargetOpcode::G_FPTOUI:
648 return AArch64::FCVTZUUWSr;
649 default:
650 return GenericOpc;
651 }
652 case 64:
653 switch (GenericOpc) {
654 case TargetOpcode::G_SITOFP:
655 return AArch64::SCVTFUXSri;
656 case TargetOpcode::G_UITOFP:
657 return AArch64::UCVTFUXSri;
658 case TargetOpcode::G_FPTOSI:
659 return AArch64::FCVTZSUWDr;
660 case TargetOpcode::G_FPTOUI:
661 return AArch64::FCVTZUUWDr;
662 default:
663 return GenericOpc;
664 }
665 default:
666 return GenericOpc;
667 }
668 case 64:
669 switch (SrcSize) {
670 case 32:
671 switch (GenericOpc) {
672 case TargetOpcode::G_SITOFP:
673 return AArch64::SCVTFUWDri;
674 case TargetOpcode::G_UITOFP:
675 return AArch64::UCVTFUWDri;
676 case TargetOpcode::G_FPTOSI:
677 return AArch64::FCVTZSUXSr;
678 case TargetOpcode::G_FPTOUI:
679 return AArch64::FCVTZUUXSr;
680 default:
681 return GenericOpc;
682 }
683 case 64:
684 switch (GenericOpc) {
685 case TargetOpcode::G_SITOFP:
686 return AArch64::SCVTFUXDri;
687 case TargetOpcode::G_UITOFP:
688 return AArch64::UCVTFUXDri;
689 case TargetOpcode::G_FPTOSI:
690 return AArch64::FCVTZSUXDr;
691 case TargetOpcode::G_FPTOUI:
692 return AArch64::FCVTZUUXDr;
693 default:
694 return GenericOpc;
695 }
696 default:
697 return GenericOpc;
698 }
699 default:
700 return GenericOpc;
701 };
702 return GenericOpc;
703}
704
Tim Northover6c02ad52016-10-12 22:49:04 +0000705static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
706 switch (P) {
707 default:
708 llvm_unreachable("Unknown condition code!");
709 case CmpInst::ICMP_NE:
710 return AArch64CC::NE;
711 case CmpInst::ICMP_EQ:
712 return AArch64CC::EQ;
713 case CmpInst::ICMP_SGT:
714 return AArch64CC::GT;
715 case CmpInst::ICMP_SGE:
716 return AArch64CC::GE;
717 case CmpInst::ICMP_SLT:
718 return AArch64CC::LT;
719 case CmpInst::ICMP_SLE:
720 return AArch64CC::LE;
721 case CmpInst::ICMP_UGT:
722 return AArch64CC::HI;
723 case CmpInst::ICMP_UGE:
724 return AArch64CC::HS;
725 case CmpInst::ICMP_ULT:
726 return AArch64CC::LO;
727 case CmpInst::ICMP_ULE:
728 return AArch64CC::LS;
729 }
730}
731
Tim Northover7dd378d2016-10-12 22:49:07 +0000732static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
733 AArch64CC::CondCode &CondCode,
734 AArch64CC::CondCode &CondCode2) {
735 CondCode2 = AArch64CC::AL;
736 switch (P) {
737 default:
738 llvm_unreachable("Unknown FP condition!");
739 case CmpInst::FCMP_OEQ:
740 CondCode = AArch64CC::EQ;
741 break;
742 case CmpInst::FCMP_OGT:
743 CondCode = AArch64CC::GT;
744 break;
745 case CmpInst::FCMP_OGE:
746 CondCode = AArch64CC::GE;
747 break;
748 case CmpInst::FCMP_OLT:
749 CondCode = AArch64CC::MI;
750 break;
751 case CmpInst::FCMP_OLE:
752 CondCode = AArch64CC::LS;
753 break;
754 case CmpInst::FCMP_ONE:
755 CondCode = AArch64CC::MI;
756 CondCode2 = AArch64CC::GT;
757 break;
758 case CmpInst::FCMP_ORD:
759 CondCode = AArch64CC::VC;
760 break;
761 case CmpInst::FCMP_UNO:
762 CondCode = AArch64CC::VS;
763 break;
764 case CmpInst::FCMP_UEQ:
765 CondCode = AArch64CC::EQ;
766 CondCode2 = AArch64CC::VS;
767 break;
768 case CmpInst::FCMP_UGT:
769 CondCode = AArch64CC::HI;
770 break;
771 case CmpInst::FCMP_UGE:
772 CondCode = AArch64CC::PL;
773 break;
774 case CmpInst::FCMP_ULT:
775 CondCode = AArch64CC::LT;
776 break;
777 case CmpInst::FCMP_ULE:
778 CondCode = AArch64CC::LE;
779 break;
780 case CmpInst::FCMP_UNE:
781 CondCode = AArch64CC::NE;
782 break;
783 }
784}
785
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000786bool AArch64InstructionSelector::selectCompareBranch(
787 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
788
789 const unsigned CondReg = I.getOperand(0).getReg();
790 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
791 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000792 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
793 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000794 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
795 return false;
796
797 unsigned LHS = CCMI->getOperand(2).getReg();
798 unsigned RHS = CCMI->getOperand(3).getReg();
799 if (!getConstantVRegVal(RHS, MRI))
800 std::swap(RHS, LHS);
801
802 const auto RHSImm = getConstantVRegVal(RHS, MRI);
803 if (!RHSImm || *RHSImm != 0)
804 return false;
805
806 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
807 if (RB.getID() != AArch64::GPRRegBankID)
808 return false;
809
810 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
811 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
812 return false;
813
814 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
815 unsigned CBOpc = 0;
816 if (CmpWidth <= 32)
817 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
818 else if (CmpWidth == 64)
819 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
820 else
821 return false;
822
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000823 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
824 .addUse(LHS)
825 .addMBB(DestMBB)
826 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000827
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000828 I.eraseFromParent();
829 return true;
830}
831
Amara Emerson9bf092d2019-04-09 21:22:43 +0000832bool AArch64InstructionSelector::selectVectorSHL(
833 MachineInstr &I, MachineRegisterInfo &MRI) const {
834 assert(I.getOpcode() == TargetOpcode::G_SHL);
835 unsigned DstReg = I.getOperand(0).getReg();
836 const LLT Ty = MRI.getType(DstReg);
837 unsigned Src1Reg = I.getOperand(1).getReg();
838 unsigned Src2Reg = I.getOperand(2).getReg();
839
840 if (!Ty.isVector())
841 return false;
842
843 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000844 if (Ty == LLT::vector(4, 32)) {
845 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000846 } else if (Ty == LLT::vector(2, 32)) {
847 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000848 } else {
849 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
850 return false;
851 }
852
853 MachineIRBuilder MIB(I);
854 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
855 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
856 I.eraseFromParent();
857 return true;
858}
859
860bool AArch64InstructionSelector::selectVectorASHR(
861 MachineInstr &I, MachineRegisterInfo &MRI) const {
862 assert(I.getOpcode() == TargetOpcode::G_ASHR);
863 unsigned DstReg = I.getOperand(0).getReg();
864 const LLT Ty = MRI.getType(DstReg);
865 unsigned Src1Reg = I.getOperand(1).getReg();
866 unsigned Src2Reg = I.getOperand(2).getReg();
867
868 if (!Ty.isVector())
869 return false;
870
871 // There is not a shift right register instruction, but the shift left
872 // register instruction takes a signed value, where negative numbers specify a
873 // right shift.
874
875 unsigned Opc = 0;
876 unsigned NegOpc = 0;
877 const TargetRegisterClass *RC = nullptr;
878 if (Ty == LLT::vector(4, 32)) {
879 Opc = AArch64::SSHLv4i32;
880 NegOpc = AArch64::NEGv4i32;
881 RC = &AArch64::FPR128RegClass;
882 } else if (Ty == LLT::vector(2, 32)) {
883 Opc = AArch64::SSHLv2i32;
884 NegOpc = AArch64::NEGv2i32;
885 RC = &AArch64::FPR64RegClass;
886 } else {
887 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
888 return false;
889 }
890
891 MachineIRBuilder MIB(I);
892 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
893 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
894 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
895 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
896 I.eraseFromParent();
897 return true;
898}
899
Tim Northovere9600d82017-02-08 17:57:27 +0000900bool AArch64InstructionSelector::selectVaStartAAPCS(
901 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
902 return false;
903}
904
905bool AArch64InstructionSelector::selectVaStartDarwin(
906 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
907 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
908 unsigned ListReg = I.getOperand(0).getReg();
909
910 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
911
912 auto MIB =
913 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
914 .addDef(ArgsAddrReg)
915 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
916 .addImm(0)
917 .addImm(0);
918
919 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
920
921 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
922 .addUse(ArgsAddrReg)
923 .addUse(ListReg)
924 .addImm(0)
925 .addMemOperand(*I.memoperands_begin());
926
927 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
928 I.eraseFromParent();
929 return true;
930}
931
Amara Emerson1e8c1642018-07-31 00:09:02 +0000932void AArch64InstructionSelector::materializeLargeCMVal(
933 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
934 MachineBasicBlock &MBB = *I.getParent();
935 MachineFunction &MF = *MBB.getParent();
936 MachineRegisterInfo &MRI = MF.getRegInfo();
937 MachineIRBuilder MIB(I);
938
Aditya Nandakumarcef44a22018-12-11 00:48:50 +0000939 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +0000940 MovZ->addOperand(MF, I.getOperand(1));
941 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
942 AArch64II::MO_NC);
943 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
944 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
945
946 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
947 unsigned ForceDstReg) {
948 unsigned DstReg = ForceDstReg
949 ? ForceDstReg
950 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
951 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
952 if (auto *GV = dyn_cast<GlobalValue>(V)) {
953 MovI->addOperand(MF, MachineOperand::CreateGA(
954 GV, MovZ->getOperand(1).getOffset(), Flags));
955 } else {
956 MovI->addOperand(
957 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
958 MovZ->getOperand(1).getOffset(), Flags));
959 }
960 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
961 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
962 return DstReg;
963 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +0000964 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +0000965 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
966 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
967 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
968 return;
969}
970
Daniel Sandersf76f3152017-11-16 00:46:35 +0000971bool AArch64InstructionSelector::select(MachineInstr &I,
972 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000973 assert(I.getParent() && "Instruction should be in a basic block!");
974 assert(I.getParent()->getParent() && "Instruction should be in a function!");
975
976 MachineBasicBlock &MBB = *I.getParent();
977 MachineFunction &MF = *MBB.getParent();
978 MachineRegisterInfo &MRI = MF.getRegInfo();
979
Tim Northovercdf23f12016-10-31 18:30:59 +0000980 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000981 // G_PHI requires same handling as PHI
982 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +0000983 // Certain non-generic instructions also need some special handling.
984
985 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
986 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000987
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000988 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +0000989 const unsigned DefReg = I.getOperand(0).getReg();
990 const LLT DefTy = MRI.getType(DefReg);
991
992 const TargetRegisterClass *DefRC = nullptr;
993 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
994 DefRC = TRI.getRegClass(DefReg);
995 } else {
996 const RegClassOrRegBank &RegClassOrBank =
997 MRI.getRegClassOrRegBank(DefReg);
998
999 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1000 if (!DefRC) {
1001 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001002 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001003 return false;
1004 }
1005 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1006 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1007 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001008 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001009 return false;
1010 }
1011 }
1012 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001013 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001014
1015 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1016 }
1017
1018 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001019 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001020
1021 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001022 }
1023
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001024
1025 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001026 LLVM_DEBUG(
1027 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001028 return false;
1029 }
1030
Daniel Sandersf76f3152017-11-16 00:46:35 +00001031 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001032 return true;
1033
Tim Northover32a078a2016-09-15 10:09:59 +00001034 LLT Ty =
1035 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001036
Amara Emerson3739a202019-03-15 21:59:50 +00001037 MachineIRBuilder MIB(I);
1038
Tim Northover69271c62016-10-12 22:49:11 +00001039 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001040 case TargetOpcode::G_BRCOND: {
1041 if (Ty.getSizeInBits() > 32) {
1042 // We shouldn't need this on AArch64, but it would be implemented as an
1043 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1044 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001045 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1046 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001047 return false;
1048 }
1049
1050 const unsigned CondReg = I.getOperand(0).getReg();
1051 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1052
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001053 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1054 // instructions will not be produced, as they are conditional branch
1055 // instructions that do not set flags.
1056 bool ProduceNonFlagSettingCondBr =
1057 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1058 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001059 return true;
1060
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001061 if (ProduceNonFlagSettingCondBr) {
1062 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1063 .addUse(CondReg)
1064 .addImm(/*bit offset=*/0)
1065 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001066
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001067 I.eraseFromParent();
1068 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1069 } else {
1070 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1071 .addDef(AArch64::WZR)
1072 .addUse(CondReg)
1073 .addImm(1);
1074 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1075 auto Bcc =
1076 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1077 .addImm(AArch64CC::EQ)
1078 .addMBB(DestMBB);
1079
1080 I.eraseFromParent();
1081 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1082 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001083 }
1084
Kristof Beyls65a12c02017-01-30 09:13:18 +00001085 case TargetOpcode::G_BRINDIRECT: {
1086 I.setDesc(TII.get(AArch64::BR));
1087 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1088 }
1089
Tim Northover4494d692016-10-18 19:47:57 +00001090 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001091 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001092 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1093
1094 const LLT s32 = LLT::scalar(32);
1095 const LLT s64 = LLT::scalar(64);
1096 const LLT p0 = LLT::pointer(0, 64);
1097
1098 const unsigned DefReg = I.getOperand(0).getReg();
1099 const LLT DefTy = MRI.getType(DefReg);
1100 const unsigned DefSize = DefTy.getSizeInBits();
1101 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1102
1103 // FIXME: Redundant check, but even less readable when factored out.
1104 if (isFP) {
1105 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001106 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1107 << " constant, expected: " << s32 << " or " << s64
1108 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001109 return false;
1110 }
1111
1112 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001113 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1114 << " constant on bank: " << RB
1115 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001116 return false;
1117 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001118
1119 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1120 // can be sure tablegen works correctly and isn't rescued by this code.
1121 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1122 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001123 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001124 // s32 and s64 are covered by tablegen.
1125 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001126 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1127 << " constant, expected: " << s32 << ", " << s64
1128 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001129 return false;
1130 }
1131
1132 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001133 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1134 << " constant on bank: " << RB
1135 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001136 return false;
1137 }
1138 }
1139
1140 const unsigned MovOpc =
1141 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1142
1143 I.setDesc(TII.get(MovOpc));
1144
1145 if (isFP) {
1146 const TargetRegisterClass &GPRRC =
1147 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1148 const TargetRegisterClass &FPRRC =
1149 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1150
1151 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1152 MachineOperand &RegOp = I.getOperand(0);
1153 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001154 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1155 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001156
1157 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001158 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001159 return false;
1160 }
1161
1162 MachineOperand &ImmOp = I.getOperand(1);
1163 // FIXME: Is going through int64_t always correct?
1164 ImmOp.ChangeToImmediate(
1165 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001166 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001167 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1168 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001169 } else if (I.getOperand(1).isImm()) {
1170 uint64_t Val = I.getOperand(1).getImm();
1171 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001172 }
1173
1174 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1175 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001176 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001177 case TargetOpcode::G_EXTRACT: {
1178 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001179 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001180 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001181 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001182 // Larger extracts are vectors, same-size extracts should be something else
1183 // by now (either split up or simplified to a COPY).
1184 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1185 return false;
1186
Amara Emersonbc03bae2018-02-18 17:03:02 +00001187 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001188 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1189 Ty.getSizeInBits() - 1);
1190
Amara Emersonbc03bae2018-02-18 17:03:02 +00001191 if (SrcSize < 64) {
1192 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1193 "unexpected G_EXTRACT types");
1194 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1195 }
1196
Tim Northover7b6d66c2017-07-20 22:58:38 +00001197 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001198 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001199 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1200 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001201 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1202 AArch64::GPR32RegClass, MRI);
1203 I.getOperand(0).setReg(DstReg);
1204
1205 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1206 }
1207
1208 case TargetOpcode::G_INSERT: {
1209 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001210 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1211 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001212 // Larger inserts are vectors, same-size ones should be something else by
1213 // now (split up or turned into COPYs).
1214 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1215 return false;
1216
Amara Emersonbc03bae2018-02-18 17:03:02 +00001217 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001218 unsigned LSB = I.getOperand(3).getImm();
1219 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001220 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001221 MachineInstrBuilder(MF, I).addImm(Width - 1);
1222
Amara Emersonbc03bae2018-02-18 17:03:02 +00001223 if (DstSize < 64) {
1224 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1225 "unexpected G_INSERT types");
1226 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1227 }
1228
Tim Northover7b6d66c2017-07-20 22:58:38 +00001229 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1230 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1231 TII.get(AArch64::SUBREG_TO_REG))
1232 .addDef(SrcReg)
1233 .addImm(0)
1234 .addUse(I.getOperand(2).getReg())
1235 .addImm(AArch64::sub_32);
1236 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1237 AArch64::GPR32RegClass, MRI);
1238 I.getOperand(2).setReg(SrcReg);
1239
1240 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1241 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001242 case TargetOpcode::G_FRAME_INDEX: {
1243 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001244 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001245 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1246 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001247 return false;
1248 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001249 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001250
1251 // MOs for a #0 shifted immediate.
1252 I.addOperand(MachineOperand::CreateImm(0));
1253 I.addOperand(MachineOperand::CreateImm(0));
1254
1255 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1256 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001257
1258 case TargetOpcode::G_GLOBAL_VALUE: {
1259 auto GV = I.getOperand(1).getGlobal();
1260 if (GV->isThreadLocal()) {
1261 // FIXME: we don't support TLS yet.
1262 return false;
1263 }
1264 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001265 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001266 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001267 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001268 } else if (TM.getCodeModel() == CodeModel::Large) {
1269 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001270 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001271 I.eraseFromParent();
1272 return true;
David Green9dd1d452018-08-22 11:31:39 +00001273 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1274 I.setDesc(TII.get(AArch64::ADR));
1275 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001276 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001277 I.setDesc(TII.get(AArch64::MOVaddr));
1278 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1279 MachineInstrBuilder MIB(MF, I);
1280 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1281 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1282 }
1283 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1284 }
1285
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001286 case TargetOpcode::G_LOAD:
1287 case TargetOpcode::G_STORE: {
Tim Northover0f140c72016-09-09 11:46:34 +00001288 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001289
Tim Northover5ae83502016-09-15 09:20:34 +00001290 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001291 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1292 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001293 return false;
1294 }
1295
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001296 auto &MemOp = **I.memoperands_begin();
1297 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001298 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001299 return false;
1300 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001301 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001302
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001303 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001304#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001305 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001306 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001307 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1308 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001309 assert(MRI.getType(PtrReg).isPointer() &&
1310 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001311#endif
1312
1313 const unsigned ValReg = I.getOperand(0).getReg();
1314 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1315
1316 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001317 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001318 if (NewOpc == I.getOpcode())
1319 return false;
1320
1321 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001322
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001323 uint64_t Offset = 0;
1324 auto *PtrMI = MRI.getVRegDef(PtrReg);
1325
1326 // Try to fold a GEP into our unsigned immediate addressing mode.
1327 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1328 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1329 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001330 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001331 const unsigned Scale = Log2_32(Size);
1332 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1333 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1334 I.getOperand(1).setReg(Ptr2Reg);
1335 PtrMI = MRI.getVRegDef(Ptr2Reg);
1336 Offset = Imm / Size;
1337 }
1338 }
1339 }
1340
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001341 // If we haven't folded anything into our addressing mode yet, try to fold
1342 // a frame index into the base+offset.
1343 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1344 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1345
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001346 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001347
1348 // If we're storing a 0, use WZR/XZR.
1349 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1350 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1351 if (I.getOpcode() == AArch64::STRWui)
1352 I.getOperand(0).setReg(AArch64::WZR);
1353 else if (I.getOpcode() == AArch64::STRXui)
1354 I.getOperand(0).setReg(AArch64::XZR);
1355 }
1356 }
1357
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001358 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1359 }
1360
Tim Northover9dd78f82017-02-08 21:22:25 +00001361 case TargetOpcode::G_SMULH:
1362 case TargetOpcode::G_UMULH: {
1363 // Reject the various things we don't support yet.
1364 if (unsupportedBinOp(I, RBI, MRI, TRI))
1365 return false;
1366
1367 const unsigned DefReg = I.getOperand(0).getReg();
1368 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1369
1370 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001371 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001372 return false;
1373 }
1374
1375 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001376 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1377 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001378 return false;
1379 }
1380
1381 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1382 : AArch64::UMULHrr;
1383 I.setDesc(TII.get(NewOpc));
1384
1385 // Now that we selected an opcode, we need to constrain the register
1386 // operands to use appropriate classes.
1387 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1388 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001389 case TargetOpcode::G_FADD:
1390 case TargetOpcode::G_FSUB:
1391 case TargetOpcode::G_FMUL:
1392 case TargetOpcode::G_FDIV:
1393
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001394 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001395 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1396 return selectVectorASHR(I, MRI);
1397 LLVM_FALLTHROUGH;
1398 case TargetOpcode::G_SHL:
1399 if (Opcode == TargetOpcode::G_SHL &&
1400 MRI.getType(I.getOperand(0).getReg()).isVector())
1401 return selectVectorSHL(I, MRI);
1402 LLVM_FALLTHROUGH;
1403 case TargetOpcode::G_OR:
1404 case TargetOpcode::G_LSHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001405 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001406 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001407 if (unsupportedBinOp(I, RBI, MRI, TRI))
1408 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001409
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001410 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001411
1412 const unsigned DefReg = I.getOperand(0).getReg();
1413 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1414
1415 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1416 if (NewOpc == I.getOpcode())
1417 return false;
1418
1419 I.setDesc(TII.get(NewOpc));
1420 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001421
1422 // Now that we selected an opcode, we need to constrain the register
1423 // operands to use appropriate classes.
1424 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1425 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001426
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001427 case TargetOpcode::G_UADDO: {
1428 // TODO: Support other types.
1429 unsigned OpSize = Ty.getSizeInBits();
1430 if (OpSize != 32 && OpSize != 64) {
1431 LLVM_DEBUG(
1432 dbgs()
1433 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1434 return false;
1435 }
1436
1437 // TODO: Support vectors.
1438 if (Ty.isVector()) {
1439 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1440 return false;
1441 }
1442
1443 // Add and set the set condition flag.
1444 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1445 MachineIRBuilder MIRBuilder(I);
1446 auto AddsMI = MIRBuilder.buildInstr(
1447 AddsOpc, {I.getOperand(0).getReg()},
1448 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1449 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1450
1451 // Now, put the overflow result in the register given by the first operand
1452 // to the G_UADDO. CSINC increments the result when the predicate is false,
1453 // so to get the increment when it's true, we need to use the inverse. In
1454 // this case, we want to increment when carry is set.
1455 auto CsetMI = MIRBuilder
1456 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1457 {AArch64::WZR, AArch64::WZR})
1458 .addImm(getInvertedCondCode(AArch64CC::HS));
1459 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1460 I.eraseFromParent();
1461 return true;
1462 }
1463
Tim Northover398c5f52017-02-14 20:56:29 +00001464 case TargetOpcode::G_PTR_MASK: {
1465 uint64_t Align = I.getOperand(2).getImm();
1466 if (Align >= 64 || Align == 0)
1467 return false;
1468
1469 uint64_t Mask = ~((1ULL << Align) - 1);
1470 I.setDesc(TII.get(AArch64::ANDXri));
1471 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1472
1473 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1474 }
Tim Northover037af52c2016-10-31 18:31:09 +00001475 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001476 case TargetOpcode::G_TRUNC: {
1477 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1478 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1479
1480 const unsigned DstReg = I.getOperand(0).getReg();
1481 const unsigned SrcReg = I.getOperand(1).getReg();
1482
1483 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1484 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1485
1486 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001487 LLVM_DEBUG(
1488 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001489 return false;
1490 }
1491
1492 if (DstRB.getID() == AArch64::GPRRegBankID) {
1493 const TargetRegisterClass *DstRC =
1494 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1495 if (!DstRC)
1496 return false;
1497
1498 const TargetRegisterClass *SrcRC =
1499 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1500 if (!SrcRC)
1501 return false;
1502
1503 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1504 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001505 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001506 return false;
1507 }
1508
1509 if (DstRC == SrcRC) {
1510 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001511 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1512 SrcTy == LLT::scalar(64)) {
1513 llvm_unreachable("TableGen can import this case");
1514 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001515 } else if (DstRC == &AArch64::GPR32RegClass &&
1516 SrcRC == &AArch64::GPR64RegClass) {
1517 I.getOperand(1).setSubReg(AArch64::sub_32);
1518 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001519 LLVM_DEBUG(
1520 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001521 return false;
1522 }
1523
1524 I.setDesc(TII.get(TargetOpcode::COPY));
1525 return true;
1526 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1527 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1528 I.setDesc(TII.get(AArch64::XTNv4i16));
1529 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1530 return true;
1531 }
1532 }
1533
1534 return false;
1535 }
1536
Tim Northover3d38b3a2016-10-11 20:50:21 +00001537 case TargetOpcode::G_ANYEXT: {
1538 const unsigned DstReg = I.getOperand(0).getReg();
1539 const unsigned SrcReg = I.getOperand(1).getReg();
1540
Quentin Colombetcb629a82016-10-12 03:57:49 +00001541 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1542 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001543 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1544 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001545 return false;
1546 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001547
Quentin Colombetcb629a82016-10-12 03:57:49 +00001548 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1549 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001550 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1551 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001552 return false;
1553 }
1554
1555 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1556
1557 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001558 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001559 return false;
1560 }
1561
Quentin Colombetcb629a82016-10-12 03:57:49 +00001562 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001563 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1564 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001565 return false;
1566 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001567 // At this point G_ANYEXT is just like a plain COPY, but we need
1568 // to explicitly form the 64-bit value if any.
1569 if (DstSize > 32) {
1570 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1571 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1572 .addDef(ExtSrc)
1573 .addImm(0)
1574 .addUse(SrcReg)
1575 .addImm(AArch64::sub_32);
1576 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001577 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001578 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001579 }
1580
1581 case TargetOpcode::G_ZEXT:
1582 case TargetOpcode::G_SEXT: {
1583 unsigned Opcode = I.getOpcode();
1584 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1585 SrcTy = MRI.getType(I.getOperand(1).getReg());
1586 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1587 const unsigned DefReg = I.getOperand(0).getReg();
1588 const unsigned SrcReg = I.getOperand(1).getReg();
1589 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1590
1591 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001592 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1593 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001594 return false;
1595 }
1596
1597 MachineInstr *ExtI;
1598 if (DstTy == LLT::scalar(64)) {
1599 // FIXME: Can we avoid manually doing this?
1600 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001601 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1602 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001603 return false;
1604 }
1605
1606 const unsigned SrcXReg =
1607 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1608 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1609 .addDef(SrcXReg)
1610 .addImm(0)
1611 .addUse(SrcReg)
1612 .addImm(AArch64::sub_32);
1613
1614 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1615 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1616 .addDef(DefReg)
1617 .addUse(SrcXReg)
1618 .addImm(0)
1619 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001620 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001621 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1622 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1623 .addDef(DefReg)
1624 .addUse(SrcReg)
1625 .addImm(0)
1626 .addImm(SrcTy.getSizeInBits() - 1);
1627 } else {
1628 return false;
1629 }
1630
1631 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1632
1633 I.eraseFromParent();
1634 return true;
1635 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001636
Tim Northover69271c62016-10-12 22:49:11 +00001637 case TargetOpcode::G_SITOFP:
1638 case TargetOpcode::G_UITOFP:
1639 case TargetOpcode::G_FPTOSI:
1640 case TargetOpcode::G_FPTOUI: {
1641 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1642 SrcTy = MRI.getType(I.getOperand(1).getReg());
1643 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1644 if (NewOpc == Opcode)
1645 return false;
1646
1647 I.setDesc(TII.get(NewOpc));
1648 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1649
1650 return true;
1651 }
1652
1653
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001654 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001655 // The importer is currently unable to import pointer types since they
1656 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001657 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001658
Daniel Sandersedd07842017-08-17 09:26:14 +00001659 case TargetOpcode::G_BITCAST:
1660 // Imported SelectionDAG rules can handle every bitcast except those that
1661 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00001662 // but we might not run an optimizer that deletes them. The other exception
1663 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1664 // of them.
1665 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00001666
Tim Northover9ac0eba2016-11-08 00:45:29 +00001667 case TargetOpcode::G_SELECT: {
1668 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001669 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1670 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001671 return false;
1672 }
1673
1674 const unsigned CondReg = I.getOperand(1).getReg();
1675 const unsigned TReg = I.getOperand(2).getReg();
1676 const unsigned FReg = I.getOperand(3).getReg();
1677
1678 unsigned CSelOpc = 0;
1679
1680 if (Ty == LLT::scalar(32)) {
1681 CSelOpc = AArch64::CSELWr;
Kristof Beylse9412b42017-01-19 13:32:14 +00001682 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
Tim Northover9ac0eba2016-11-08 00:45:29 +00001683 CSelOpc = AArch64::CSELXr;
1684 } else {
1685 return false;
1686 }
1687
1688 MachineInstr &TstMI =
1689 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1690 .addDef(AArch64::WZR)
1691 .addUse(CondReg)
1692 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1693
1694 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1695 .addDef(I.getOperand(0).getReg())
1696 .addUse(TReg)
1697 .addUse(FReg)
1698 .addImm(AArch64CC::NE);
1699
1700 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1701 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1702
1703 I.eraseFromParent();
1704 return true;
1705 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001706 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00001707 if (Ty.isVector())
1708 return selectVectorICmp(I, MRI);
1709
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001710 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001711 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1712 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001713 return false;
1714 }
1715
1716 unsigned CmpOpc = 0;
1717 unsigned ZReg = 0;
1718
1719 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1720 if (CmpTy == LLT::scalar(32)) {
1721 CmpOpc = AArch64::SUBSWrr;
1722 ZReg = AArch64::WZR;
1723 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1724 CmpOpc = AArch64::SUBSXrr;
1725 ZReg = AArch64::XZR;
1726 } else {
1727 return false;
1728 }
1729
Kristof Beyls22524402017-01-05 10:16:08 +00001730 // CSINC increments the result by one when the condition code is false.
1731 // Therefore, we have to invert the predicate to get an increment by 1 when
1732 // the predicate is true.
1733 const AArch64CC::CondCode invCC =
1734 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1735 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001736
1737 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1738 .addDef(ZReg)
1739 .addUse(I.getOperand(2).getReg())
1740 .addUse(I.getOperand(3).getReg());
1741
1742 MachineInstr &CSetMI =
1743 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1744 .addDef(I.getOperand(0).getReg())
1745 .addUse(AArch64::WZR)
1746 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001747 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001748
1749 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1750 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1751
1752 I.eraseFromParent();
1753 return true;
1754 }
1755
Tim Northover7dd378d2016-10-12 22:49:07 +00001756 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001757 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001758 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1759 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001760 return false;
1761 }
1762
1763 unsigned CmpOpc = 0;
1764 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1765 if (CmpTy == LLT::scalar(32)) {
1766 CmpOpc = AArch64::FCMPSrr;
1767 } else if (CmpTy == LLT::scalar(64)) {
1768 CmpOpc = AArch64::FCMPDrr;
1769 } else {
1770 return false;
1771 }
1772
1773 // FIXME: regbank
1774
1775 AArch64CC::CondCode CC1, CC2;
1776 changeFCMPPredToAArch64CC(
1777 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1778
1779 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1780 .addUse(I.getOperand(2).getReg())
1781 .addUse(I.getOperand(3).getReg());
1782
1783 const unsigned DefReg = I.getOperand(0).getReg();
1784 unsigned Def1Reg = DefReg;
1785 if (CC2 != AArch64CC::AL)
1786 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1787
1788 MachineInstr &CSetMI =
1789 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1790 .addDef(Def1Reg)
1791 .addUse(AArch64::WZR)
1792 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001793 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001794
1795 if (CC2 != AArch64CC::AL) {
1796 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1797 MachineInstr &CSet2MI =
1798 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1799 .addDef(Def2Reg)
1800 .addUse(AArch64::WZR)
1801 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001802 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001803 MachineInstr &OrMI =
1804 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1805 .addDef(DefReg)
1806 .addUse(Def1Reg)
1807 .addUse(Def2Reg);
1808 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1809 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1810 }
1811
1812 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1813 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1814
1815 I.eraseFromParent();
1816 return true;
1817 }
Tim Northovere9600d82017-02-08 17:57:27 +00001818 case TargetOpcode::G_VASTART:
1819 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1820 : selectVaStartAAPCS(I, MF, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001821 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001822 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001823 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001824 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001825 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1826 const unsigned DstReg = I.getOperand(0).getReg();
1827 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1828 const TargetRegisterClass *DstRC =
1829 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1830 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001831 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001832 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001833 case TargetOpcode::G_BLOCK_ADDR: {
1834 if (TM.getCodeModel() == CodeModel::Large) {
1835 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1836 I.eraseFromParent();
1837 return true;
1838 } else {
1839 I.setDesc(TII.get(AArch64::MOVaddrBA));
1840 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1841 I.getOperand(0).getReg())
1842 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1843 /* Offset */ 0, AArch64II::MO_PAGE)
1844 .addBlockAddress(
1845 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1846 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1847 I.eraseFromParent();
1848 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1849 }
1850 }
Jessica Paquette991cb392019-04-23 20:46:19 +00001851 case TargetOpcode::G_INTRINSIC_TRUNC:
1852 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00001853 case TargetOpcode::G_INTRINSIC_ROUND:
1854 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00001855 case TargetOpcode::G_BUILD_VECTOR:
1856 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001857 case TargetOpcode::G_MERGE_VALUES:
1858 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001859 case TargetOpcode::G_UNMERGE_VALUES:
1860 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001861 case TargetOpcode::G_SHUFFLE_VECTOR:
1862 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001863 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1864 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001865 case TargetOpcode::G_INSERT_VECTOR_ELT:
1866 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00001867 case TargetOpcode::G_CONCAT_VECTORS:
1868 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001869 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001870
1871 return false;
1872}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00001873
Jessica Paquette991cb392019-04-23 20:46:19 +00001874bool AArch64InstructionSelector::selectIntrinsicTrunc(
1875 MachineInstr &I, MachineRegisterInfo &MRI) const {
1876 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
1877
1878 // Select the correct opcode.
1879 unsigned Opc = 0;
1880 if (!SrcTy.isVector()) {
1881 switch (SrcTy.getSizeInBits()) {
1882 default:
1883 case 16:
1884 Opc = AArch64::FRINTZHr;
1885 break;
1886 case 32:
1887 Opc = AArch64::FRINTZSr;
1888 break;
1889 case 64:
1890 Opc = AArch64::FRINTZDr;
1891 break;
1892 }
1893 } else {
1894 unsigned NumElts = SrcTy.getNumElements();
1895 switch (SrcTy.getElementType().getSizeInBits()) {
1896 default:
1897 break;
1898 case 16:
1899 if (NumElts == 4)
1900 Opc = AArch64::FRINTZv4f16;
1901 else if (NumElts == 8)
1902 Opc = AArch64::FRINTZv8f16;
1903 break;
1904 case 32:
1905 if (NumElts == 2)
1906 Opc = AArch64::FRINTZv2f32;
1907 else if (NumElts == 4)
1908 Opc = AArch64::FRINTZv4f32;
1909 break;
1910 case 64:
1911 if (NumElts == 2)
1912 Opc = AArch64::FRINTZv2f64;
1913 break;
1914 }
1915 }
1916
1917 if (!Opc) {
1918 // Didn't get an opcode above, bail.
1919 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
1920 return false;
1921 }
1922
1923 // Legalization would have set us up perfectly for this; we just need to
1924 // set the opcode and move on.
1925 I.setDesc(TII.get(Opc));
1926 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1927}
1928
Jessica Paquette4fe75742019-04-23 23:03:03 +00001929bool AArch64InstructionSelector::selectIntrinsicRound(
1930 MachineInstr &I, MachineRegisterInfo &MRI) const {
1931 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
1932
1933 // Select the correct opcode.
1934 unsigned Opc = 0;
1935 if (!SrcTy.isVector()) {
1936 switch (SrcTy.getSizeInBits()) {
1937 default:
1938 case 16:
1939 Opc = AArch64::FRINTAHr;
1940 break;
1941 case 32:
1942 Opc = AArch64::FRINTASr;
1943 break;
1944 case 64:
1945 Opc = AArch64::FRINTADr;
1946 break;
1947 }
1948 } else {
1949 unsigned NumElts = SrcTy.getNumElements();
1950 switch (SrcTy.getElementType().getSizeInBits()) {
1951 default:
1952 break;
1953 case 16:
1954 if (NumElts == 4)
1955 Opc = AArch64::FRINTAv4f16;
1956 else if (NumElts == 8)
1957 Opc = AArch64::FRINTAv8f16;
1958 break;
1959 case 32:
1960 if (NumElts == 2)
1961 Opc = AArch64::FRINTAv2f32;
1962 else if (NumElts == 4)
1963 Opc = AArch64::FRINTAv4f32;
1964 break;
1965 case 64:
1966 if (NumElts == 2)
1967 Opc = AArch64::FRINTAv2f64;
1968 break;
1969 }
1970 }
1971
1972 if (!Opc) {
1973 // Didn't get an opcode above, bail.
1974 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
1975 return false;
1976 }
1977
1978 // Legalization would have set us up perfectly for this; we just need to
1979 // set the opcode and move on.
1980 I.setDesc(TII.get(Opc));
1981 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1982}
1983
Amara Emerson9bf092d2019-04-09 21:22:43 +00001984bool AArch64InstructionSelector::selectVectorICmp(
1985 MachineInstr &I, MachineRegisterInfo &MRI) const {
1986 unsigned DstReg = I.getOperand(0).getReg();
1987 LLT DstTy = MRI.getType(DstReg);
1988 unsigned SrcReg = I.getOperand(2).getReg();
1989 unsigned Src2Reg = I.getOperand(3).getReg();
1990 LLT SrcTy = MRI.getType(SrcReg);
1991
1992 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
1993 unsigned NumElts = DstTy.getNumElements();
1994
1995 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
1996 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
1997 // Third index is cc opcode:
1998 // 0 == eq
1999 // 1 == ugt
2000 // 2 == uge
2001 // 3 == ult
2002 // 4 == ule
2003 // 5 == sgt
2004 // 6 == sge
2005 // 7 == slt
2006 // 8 == sle
2007 // ne is done by negating 'eq' result.
2008
2009 // This table below assumes that for some comparisons the operands will be
2010 // commuted.
2011 // ult op == commute + ugt op
2012 // ule op == commute + uge op
2013 // slt op == commute + sgt op
2014 // sle op == commute + sge op
2015 unsigned PredIdx = 0;
2016 bool SwapOperands = false;
2017 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2018 switch (Pred) {
2019 case CmpInst::ICMP_NE:
2020 case CmpInst::ICMP_EQ:
2021 PredIdx = 0;
2022 break;
2023 case CmpInst::ICMP_UGT:
2024 PredIdx = 1;
2025 break;
2026 case CmpInst::ICMP_UGE:
2027 PredIdx = 2;
2028 break;
2029 case CmpInst::ICMP_ULT:
2030 PredIdx = 3;
2031 SwapOperands = true;
2032 break;
2033 case CmpInst::ICMP_ULE:
2034 PredIdx = 4;
2035 SwapOperands = true;
2036 break;
2037 case CmpInst::ICMP_SGT:
2038 PredIdx = 5;
2039 break;
2040 case CmpInst::ICMP_SGE:
2041 PredIdx = 6;
2042 break;
2043 case CmpInst::ICMP_SLT:
2044 PredIdx = 7;
2045 SwapOperands = true;
2046 break;
2047 case CmpInst::ICMP_SLE:
2048 PredIdx = 8;
2049 SwapOperands = true;
2050 break;
2051 default:
2052 llvm_unreachable("Unhandled icmp predicate");
2053 return false;
2054 }
2055
2056 // This table obviously should be tablegen'd when we have our GISel native
2057 // tablegen selector.
2058
2059 static const unsigned OpcTable[4][4][9] = {
2060 {
2061 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2062 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2063 0 /* invalid */},
2064 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2065 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2066 0 /* invalid */},
2067 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2068 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2069 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2070 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2071 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2072 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2073 },
2074 {
2075 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2076 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2077 0 /* invalid */},
2078 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2079 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2080 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2081 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2082 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2083 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2084 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2085 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2086 0 /* invalid */}
2087 },
2088 {
2089 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2090 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2091 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2092 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2093 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2094 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2095 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2096 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2097 0 /* invalid */},
2098 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2099 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2100 0 /* invalid */}
2101 },
2102 {
2103 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2104 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2105 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2106 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2107 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2108 0 /* invalid */},
2109 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2110 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2111 0 /* invalid */},
2112 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2113 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2114 0 /* invalid */}
2115 },
2116 };
2117 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2118 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2119 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2120 if (!Opc) {
2121 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2122 return false;
2123 }
2124
2125 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2126 const TargetRegisterClass *SrcRC =
2127 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2128 if (!SrcRC) {
2129 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2130 return false;
2131 }
2132
2133 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2134 if (SrcTy.getSizeInBits() == 128)
2135 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2136
2137 if (SwapOperands)
2138 std::swap(SrcReg, Src2Reg);
2139
2140 MachineIRBuilder MIB(I);
2141 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2142 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2143
2144 // Invert if we had a 'ne' cc.
2145 if (NotOpc) {
2146 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2147 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2148 } else {
2149 MIB.buildCopy(DstReg, Cmp.getReg(0));
2150 }
2151 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2152 I.eraseFromParent();
2153 return true;
2154}
2155
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002156MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00002157 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002158 MachineIRBuilder &MIRBuilder) const {
2159 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002160
2161 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002162 auto Ins =
2163 MIRBuilder
2164 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2165 .addImm(SubregIndex);
2166 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2167 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2168 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002169 };
2170
Amara Emerson8acb0d92019-03-04 19:16:00 +00002171 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002172 case 16:
2173 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002174 case 32:
2175 return BuildFn(AArch64::ssub);
2176 case 64:
2177 return BuildFn(AArch64::dsub);
2178 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002179 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002180 }
2181}
2182
Amara Emerson8cb186c2018-12-20 01:11:04 +00002183bool AArch64InstructionSelector::selectMergeValues(
2184 MachineInstr &I, MachineRegisterInfo &MRI) const {
2185 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2186 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2187 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2188 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2189
2190 // At the moment we only support merging two s32s into an s64.
2191 if (I.getNumOperands() != 3)
2192 return false;
2193 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2194 return false;
2195 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2196 if (RB.getID() != AArch64::GPRRegBankID)
2197 return false;
2198
2199 auto *DstRC = &AArch64::GPR64RegClass;
2200 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
2201 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2202 TII.get(TargetOpcode::SUBREG_TO_REG))
2203 .addDef(SubToRegDef)
2204 .addImm(0)
2205 .addUse(I.getOperand(1).getReg())
2206 .addImm(AArch64::sub_32);
2207 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2208 // Need to anyext the second scalar before we can use bfm
2209 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2210 TII.get(TargetOpcode::SUBREG_TO_REG))
2211 .addDef(SubToRegDef2)
2212 .addImm(0)
2213 .addUse(I.getOperand(2).getReg())
2214 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002215 MachineInstr &BFM =
2216 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002217 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002218 .addUse(SubToRegDef)
2219 .addUse(SubToRegDef2)
2220 .addImm(32)
2221 .addImm(31);
2222 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2223 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2224 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2225 I.eraseFromParent();
2226 return true;
2227}
2228
Jessica Paquette607774c2019-03-11 22:18:01 +00002229static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2230 const unsigned EltSize) {
2231 // Choose a lane copy opcode and subregister based off of the size of the
2232 // vector's elements.
2233 switch (EltSize) {
2234 case 16:
2235 CopyOpc = AArch64::CPYi16;
2236 ExtractSubReg = AArch64::hsub;
2237 break;
2238 case 32:
2239 CopyOpc = AArch64::CPYi32;
2240 ExtractSubReg = AArch64::ssub;
2241 break;
2242 case 64:
2243 CopyOpc = AArch64::CPYi64;
2244 ExtractSubReg = AArch64::dsub;
2245 break;
2246 default:
2247 // Unknown size, bail out.
2248 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2249 return false;
2250 }
2251 return true;
2252}
2253
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002254/// Given a register \p Reg, find the value of a constant defining \p Reg.
2255/// Return true if one could be found, and store it in \p Val. Return false
2256/// otherwise.
2257static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI,
2258 unsigned &Val) {
2259 // Look at the def of the register.
2260 MachineInstr *Def = MRI.getVRegDef(Reg);
2261 if (!Def)
2262 return false;
2263
2264 // Find the first definition which isn't a copy.
2265 if (Def->isCopy()) {
2266 Reg = Def->getOperand(1).getReg();
2267 auto It = find_if_not(MRI.reg_nodbg_instructions(Reg),
2268 [](const MachineInstr &MI) { return MI.isCopy(); });
2269 if (It == MRI.reg_instr_nodbg_end()) {
2270 LLVM_DEBUG(dbgs() << "Couldn't find non-copy def for register\n");
2271 return false;
2272 }
2273 Def = &*It;
2274 }
2275
2276 // TODO: Handle opcodes other than G_CONSTANT.
2277 if (Def->getOpcode() != TargetOpcode::G_CONSTANT) {
2278 LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
2279 "currently unsupported.\n");
2280 return false;
2281 }
2282
2283 // Return the constant value associated with the operand.
2284 Val = Def->getOperand(1).getCImm()->getLimitedValue();
2285 return true;
2286}
2287
Amara Emersond61b89b2019-03-14 22:48:18 +00002288MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2289 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2290 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2291 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2292 unsigned CopyOpc = 0;
2293 unsigned ExtractSubReg = 0;
2294 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2295 LLVM_DEBUG(
2296 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2297 return nullptr;
2298 }
2299
2300 const TargetRegisterClass *DstRC =
2301 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2302 if (!DstRC) {
2303 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2304 return nullptr;
2305 }
2306
2307 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2308 const LLT &VecTy = MRI.getType(VecReg);
2309 const TargetRegisterClass *VecRC =
2310 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2311 if (!VecRC) {
2312 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2313 return nullptr;
2314 }
2315
2316 // The register that we're going to copy into.
2317 unsigned InsertReg = VecReg;
2318 if (!DstReg)
2319 DstReg = MRI.createVirtualRegister(DstRC);
2320 // If the lane index is 0, we just use a subregister COPY.
2321 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002322 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2323 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002324 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002325 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002326 }
2327
2328 // Lane copies require 128-bit wide registers. If we're dealing with an
2329 // unpacked vector, then we need to move up to that width. Insert an implicit
2330 // def and a subregister insert to get us there.
2331 if (VecTy.getSizeInBits() != 128) {
2332 MachineInstr *ScalarToVector = emitScalarToVector(
2333 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2334 if (!ScalarToVector)
2335 return nullptr;
2336 InsertReg = ScalarToVector->getOperand(0).getReg();
2337 }
2338
2339 MachineInstr *LaneCopyMI =
2340 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2341 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2342
2343 // Make sure that we actually constrain the initial copy.
2344 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2345 return LaneCopyMI;
2346}
2347
Jessica Paquette607774c2019-03-11 22:18:01 +00002348bool AArch64InstructionSelector::selectExtractElt(
2349 MachineInstr &I, MachineRegisterInfo &MRI) const {
2350 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2351 "unexpected opcode!");
2352 unsigned DstReg = I.getOperand(0).getReg();
2353 const LLT NarrowTy = MRI.getType(DstReg);
2354 const unsigned SrcReg = I.getOperand(1).getReg();
2355 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002356 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002357 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2358 "source register size too small!");
2359 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2360
2361 // Need the lane index to determine the correct copy opcode.
2362 MachineOperand &LaneIdxOp = I.getOperand(2);
2363 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2364
2365 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2366 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2367 return false;
2368 }
2369
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002370 // Find the index to extract from.
2371 unsigned LaneIdx = 0;
2372 if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
Jessica Paquette607774c2019-03-11 22:18:01 +00002373 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002374
Jessica Paquette607774c2019-03-11 22:18:01 +00002375 MachineIRBuilder MIRBuilder(I);
2376
Amara Emersond61b89b2019-03-14 22:48:18 +00002377 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2378 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2379 LaneIdx, MIRBuilder);
2380 if (!Extract)
2381 return false;
2382
2383 I.eraseFromParent();
2384 return true;
2385}
2386
2387bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2388 MachineInstr &I, MachineRegisterInfo &MRI) const {
2389 unsigned NumElts = I.getNumOperands() - 1;
2390 unsigned SrcReg = I.getOperand(NumElts).getReg();
2391 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2392 const LLT SrcTy = MRI.getType(SrcReg);
2393
2394 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2395 if (SrcTy.getSizeInBits() > 128) {
2396 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2397 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002398 }
2399
Amara Emersond61b89b2019-03-14 22:48:18 +00002400 MachineIRBuilder MIB(I);
2401
2402 // We implement a split vector operation by treating the sub-vectors as
2403 // scalars and extracting them.
2404 const RegisterBank &DstRB =
2405 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2406 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2407 unsigned Dst = I.getOperand(OpIdx).getReg();
2408 MachineInstr *Extract =
2409 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2410 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002411 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002412 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002413 I.eraseFromParent();
2414 return true;
2415}
2416
Jessica Paquette245047d2019-01-24 22:00:41 +00002417bool AArch64InstructionSelector::selectUnmergeValues(
2418 MachineInstr &I, MachineRegisterInfo &MRI) const {
2419 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2420 "unexpected opcode");
2421
2422 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2423 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2424 AArch64::FPRRegBankID ||
2425 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2426 AArch64::FPRRegBankID) {
2427 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2428 "currently unsupported.\n");
2429 return false;
2430 }
2431
2432 // The last operand is the vector source register, and every other operand is
2433 // a register to unpack into.
2434 unsigned NumElts = I.getNumOperands() - 1;
2435 unsigned SrcReg = I.getOperand(NumElts).getReg();
2436 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2437 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002438 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002439 assert(WideTy.isVector() && "can only unmerge from vector types!");
2440 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2441 "source register size too small!");
2442
Amara Emersond61b89b2019-03-14 22:48:18 +00002443 if (!NarrowTy.isScalar())
2444 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002445
Amara Emerson3739a202019-03-15 21:59:50 +00002446 MachineIRBuilder MIB(I);
2447
Jessica Paquette245047d2019-01-24 22:00:41 +00002448 // Choose a lane copy opcode and subregister based off of the size of the
2449 // vector's elements.
2450 unsigned CopyOpc = 0;
2451 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002452 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002453 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002454
2455 // Set up for the lane copies.
2456 MachineBasicBlock &MBB = *I.getParent();
2457
2458 // Stores the registers we'll be copying from.
2459 SmallVector<unsigned, 4> InsertRegs;
2460
2461 // We'll use the first register twice, so we only need NumElts-1 registers.
2462 unsigned NumInsertRegs = NumElts - 1;
2463
2464 // If our elements fit into exactly 128 bits, then we can copy from the source
2465 // directly. Otherwise, we need to do a bit of setup with some subregister
2466 // inserts.
2467 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2468 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2469 } else {
2470 // No. We have to perform subregister inserts. For each insert, create an
2471 // implicit def and a subregister insert, and save the register we create.
2472 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2473 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2474 MachineInstr &ImpDefMI =
2475 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2476 ImpDefReg);
2477
2478 // Now, create the subregister insert from SrcReg.
2479 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2480 MachineInstr &InsMI =
2481 *BuildMI(MBB, I, I.getDebugLoc(),
2482 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2483 .addUse(ImpDefReg)
2484 .addUse(SrcReg)
2485 .addImm(AArch64::dsub);
2486
2487 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2488 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2489
2490 // Save the register so that we can copy from it after.
2491 InsertRegs.push_back(InsertReg);
2492 }
2493 }
2494
2495 // Now that we've created any necessary subregister inserts, we can
2496 // create the copies.
2497 //
2498 // Perform the first copy separately as a subregister copy.
2499 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002500 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2501 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002502 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002503
2504 // Now, perform the remaining copies as vector lane copies.
2505 unsigned LaneIdx = 1;
2506 for (unsigned InsReg : InsertRegs) {
2507 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2508 MachineInstr &CopyInst =
2509 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2510 .addUse(InsReg)
2511 .addImm(LaneIdx);
2512 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2513 ++LaneIdx;
2514 }
2515
2516 // Separately constrain the first copy's destination. Because of the
2517 // limitation in constrainOperandRegClass, we can't guarantee that this will
2518 // actually be constrained. So, do it ourselves using the second operand.
2519 const TargetRegisterClass *RC =
2520 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2521 if (!RC) {
2522 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2523 return false;
2524 }
2525
2526 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2527 I.eraseFromParent();
2528 return true;
2529}
2530
Amara Emerson2ff22982019-03-14 22:48:15 +00002531bool AArch64InstructionSelector::selectConcatVectors(
2532 MachineInstr &I, MachineRegisterInfo &MRI) const {
2533 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2534 "Unexpected opcode");
2535 unsigned Dst = I.getOperand(0).getReg();
2536 unsigned Op1 = I.getOperand(1).getReg();
2537 unsigned Op2 = I.getOperand(2).getReg();
2538 MachineIRBuilder MIRBuilder(I);
2539 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2540 if (!ConcatMI)
2541 return false;
2542 I.eraseFromParent();
2543 return true;
2544}
2545
Amara Emerson1abe05c2019-02-21 20:20:16 +00002546void AArch64InstructionSelector::collectShuffleMaskIndices(
2547 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00002548 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00002549 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2550 assert(
2551 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2552 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2553 // Find the constant indices.
2554 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2555 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2556 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2557 // Look through copies.
2558 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2559 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2560 assert(ScalarDef && "Could not find def of copy operand");
2561 }
Amara Emerson2806fd02019-04-12 21:31:21 +00002562 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2563 // This be an undef if not a constant.
2564 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2565 Idxs.push_back(None);
2566 } else {
2567 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2568 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002569 }
2570}
2571
2572unsigned
2573AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2574 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00002575 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00002576 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2577 if (Align == 0)
2578 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2579
2580 MachineConstantPool *MCP = MF.getConstantPool();
2581 return MCP->getConstantPoolIndex(CPVal, Align);
2582}
2583
2584MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2585 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2586 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2587
2588 auto Adrp =
2589 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2590 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002591
2592 MachineInstr *LoadMI = nullptr;
2593 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2594 case 16:
2595 LoadMI =
2596 &*MIRBuilder
2597 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2598 .addConstantPoolIndex(CPIdx, 0,
2599 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2600 break;
2601 case 8:
2602 LoadMI = &*MIRBuilder
2603 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2604 .addConstantPoolIndex(
2605 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2606 break;
2607 default:
2608 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2609 << *CPVal->getType());
2610 return nullptr;
2611 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002612 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002613 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2614 return LoadMI;
2615}
2616
2617/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2618/// size and RB.
2619static std::pair<unsigned, unsigned>
2620getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2621 unsigned Opc, SubregIdx;
2622 if (RB.getID() == AArch64::GPRRegBankID) {
2623 if (EltSize == 32) {
2624 Opc = AArch64::INSvi32gpr;
2625 SubregIdx = AArch64::ssub;
2626 } else if (EltSize == 64) {
2627 Opc = AArch64::INSvi64gpr;
2628 SubregIdx = AArch64::dsub;
2629 } else {
2630 llvm_unreachable("invalid elt size!");
2631 }
2632 } else {
2633 if (EltSize == 8) {
2634 Opc = AArch64::INSvi8lane;
2635 SubregIdx = AArch64::bsub;
2636 } else if (EltSize == 16) {
2637 Opc = AArch64::INSvi16lane;
2638 SubregIdx = AArch64::hsub;
2639 } else if (EltSize == 32) {
2640 Opc = AArch64::INSvi32lane;
2641 SubregIdx = AArch64::ssub;
2642 } else if (EltSize == 64) {
2643 Opc = AArch64::INSvi64lane;
2644 SubregIdx = AArch64::dsub;
2645 } else {
2646 llvm_unreachable("invalid elt size!");
2647 }
2648 }
2649 return std::make_pair(Opc, SubregIdx);
2650}
2651
2652MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002653 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2654 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002655 // We implement a vector concat by:
2656 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2657 // 2. Insert the upper vector into the destination's upper element
2658 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2659 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2660
2661 const LLT Op1Ty = MRI.getType(Op1);
2662 const LLT Op2Ty = MRI.getType(Op2);
2663
2664 if (Op1Ty != Op2Ty) {
2665 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2666 return nullptr;
2667 }
2668 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2669
2670 if (Op1Ty.getSizeInBits() >= 128) {
2671 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2672 return nullptr;
2673 }
2674
2675 // At the moment we just support 64 bit vector concats.
2676 if (Op1Ty.getSizeInBits() != 64) {
2677 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2678 return nullptr;
2679 }
2680
2681 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2682 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2683 const TargetRegisterClass *DstRC =
2684 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2685
2686 MachineInstr *WidenedOp1 =
2687 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2688 MachineInstr *WidenedOp2 =
2689 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2690 if (!WidenedOp1 || !WidenedOp2) {
2691 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2692 return nullptr;
2693 }
2694
2695 // Now do the insert of the upper element.
2696 unsigned InsertOpc, InsSubRegIdx;
2697 std::tie(InsertOpc, InsSubRegIdx) =
2698 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2699
Amara Emerson2ff22982019-03-14 22:48:15 +00002700 if (!Dst)
2701 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002702 auto InsElt =
2703 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002704 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002705 .addImm(1) /* Lane index */
2706 .addUse(WidenedOp2->getOperand(0).getReg())
2707 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002708 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2709 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002710}
2711
Amara Emerson761ca2e2019-03-19 21:43:05 +00002712bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2713 // Try to match a vector splat operation into a dup instruction.
2714 // We're looking for this pattern:
2715 // %scalar:gpr(s64) = COPY $x0
2716 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2717 // %cst0:gpr(s32) = G_CONSTANT i32 0
2718 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2719 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2720 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2721 // %zerovec(<2 x s32>)
2722 //
2723 // ...into:
2724 // %splat = DUP %scalar
2725 // We use the regbank of the scalar to determine which kind of dup to use.
2726 MachineIRBuilder MIB(I);
2727 MachineRegisterInfo &MRI = *MIB.getMRI();
2728 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2729 using namespace TargetOpcode;
2730 using namespace MIPatternMatch;
2731
2732 // Begin matching the insert.
2733 auto *InsMI =
2734 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2735 if (!InsMI)
2736 return false;
2737 // Match the undef vector operand.
2738 auto *UndefMI =
2739 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2740 if (!UndefMI)
2741 return false;
2742 // Match the scalar being splatted.
2743 unsigned ScalarReg = InsMI->getOperand(2).getReg();
2744 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2745 // Match the index constant 0.
2746 int64_t Index = 0;
2747 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2748 return false;
2749
2750 // The shuffle's second operand doesn't matter if the mask is all zero.
2751 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2752 if (!ZeroVec)
2753 return false;
2754 int64_t Zero = 0;
2755 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2756 return false;
2757 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2758 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2759 return false; // This wasn't an all zeros vector.
2760 }
2761
2762 // We're done, now find out what kind of splat we need.
2763 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2764 LLT EltTy = VecTy.getElementType();
2765 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2766 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2767 return false;
2768 }
2769 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2770 static const unsigned OpcTable[2][2] = {
2771 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2772 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2773 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2774
2775 // For FP splats, we need to widen the scalar reg via undef too.
2776 if (IsFP) {
2777 MachineInstr *Widen = emitScalarToVector(
2778 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2779 if (!Widen)
2780 return false;
2781 ScalarReg = Widen->getOperand(0).getReg();
2782 }
2783 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
2784 if (IsFP)
2785 Dup.addImm(0);
2786 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
2787 I.eraseFromParent();
2788 return true;
2789}
2790
2791bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
2792 if (TM.getOptLevel() == CodeGenOpt::None)
2793 return false;
2794 if (tryOptVectorDup(I))
2795 return true;
2796 return false;
2797}
2798
Amara Emerson1abe05c2019-02-21 20:20:16 +00002799bool AArch64InstructionSelector::selectShuffleVector(
2800 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00002801 if (tryOptVectorShuffle(I))
2802 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002803 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2804 unsigned Src1Reg = I.getOperand(1).getReg();
2805 const LLT Src1Ty = MRI.getType(Src1Reg);
2806 unsigned Src2Reg = I.getOperand(2).getReg();
2807 const LLT Src2Ty = MRI.getType(Src2Reg);
2808
2809 MachineBasicBlock &MBB = *I.getParent();
2810 MachineFunction &MF = *MBB.getParent();
2811 LLVMContext &Ctx = MF.getFunction().getContext();
2812
2813 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2814 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00002815 // find the mask indices. If the mask element is undef, then
2816 // collectShuffleMaskIndices() will add a None entry for that index into
2817 // the list.
2818 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002819 collectShuffleMaskIndices(I, MRI, Mask);
2820 assert(!Mask.empty() && "Expected to find mask indices");
2821
2822 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2823 // it's originated from a <1 x T> type. Those should have been lowered into
2824 // G_BUILD_VECTOR earlier.
2825 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2826 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2827 return false;
2828 }
2829
2830 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2831
2832 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00002833 for (auto &MaybeVal : Mask) {
2834 // For now, any undef indexes we'll just assume to be 0. This should be
2835 // optimized in future, e.g. to select DUP etc.
2836 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002837 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2838 unsigned Offset = Byte + Val * BytesPerElt;
2839 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2840 }
2841 }
2842
Amara Emerson8acb0d92019-03-04 19:16:00 +00002843 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002844
2845 // Use a constant pool to load the index vector for TBL.
2846 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002847 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2848 if (!IndexLoad) {
2849 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2850 return false;
2851 }
2852
Amara Emerson8acb0d92019-03-04 19:16:00 +00002853 if (DstTy.getSizeInBits() != 128) {
2854 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2855 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00002856 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002857 if (!Concat) {
2858 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2859 return false;
2860 }
2861
2862 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2863 IndexLoad =
2864 emitScalarToVector(64, &AArch64::FPR128RegClass,
2865 IndexLoad->getOperand(0).getReg(), MIRBuilder);
2866
2867 auto TBL1 = MIRBuilder.buildInstr(
2868 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2869 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2870 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
2871
Amara Emerson3739a202019-03-15 21:59:50 +00002872 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00002873 MIRBuilder
2874 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2875 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002876 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2877 I.eraseFromParent();
2878 return true;
2879 }
2880
Amara Emerson1abe05c2019-02-21 20:20:16 +00002881 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2882 // Q registers for regalloc.
2883 auto RegSeq = MIRBuilder
2884 .buildInstr(TargetOpcode::REG_SEQUENCE,
2885 {&AArch64::QQRegClass}, {Src1Reg})
2886 .addImm(AArch64::qsub0)
2887 .addUse(Src2Reg)
2888 .addImm(AArch64::qsub1);
2889
2890 auto TBL2 =
2891 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2892 {RegSeq, IndexLoad->getOperand(0).getReg()});
2893 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2894 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
2895 I.eraseFromParent();
2896 return true;
2897}
2898
Jessica Paquette16d67a32019-03-13 23:22:23 +00002899MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2900 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2901 unsigned LaneIdx, const RegisterBank &RB,
2902 MachineIRBuilder &MIRBuilder) const {
2903 MachineInstr *InsElt = nullptr;
2904 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2905 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2906
2907 // Create a register to define with the insert if one wasn't passed in.
2908 if (!DstReg)
2909 DstReg = MRI.createVirtualRegister(DstRC);
2910
2911 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2912 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2913
2914 if (RB.getID() == AArch64::FPRRegBankID) {
2915 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2916 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2917 .addImm(LaneIdx)
2918 .addUse(InsSub->getOperand(0).getReg())
2919 .addImm(0);
2920 } else {
2921 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2922 .addImm(LaneIdx)
2923 .addUse(EltReg);
2924 }
2925
2926 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2927 return InsElt;
2928}
2929
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002930bool AArch64InstructionSelector::selectInsertElt(
2931 MachineInstr &I, MachineRegisterInfo &MRI) const {
2932 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2933
2934 // Get information on the destination.
2935 unsigned DstReg = I.getOperand(0).getReg();
2936 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002937 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002938
2939 // Get information on the element we want to insert into the destination.
2940 unsigned EltReg = I.getOperand(2).getReg();
2941 const LLT EltTy = MRI.getType(EltReg);
2942 unsigned EltSize = EltTy.getSizeInBits();
2943 if (EltSize < 16 || EltSize > 64)
2944 return false; // Don't support all element types yet.
2945
2946 // Find the definition of the index. Bail out if it's not defined by a
2947 // G_CONSTANT.
2948 unsigned IdxReg = I.getOperand(3).getReg();
2949 unsigned LaneIdx = 0;
2950 if (!getConstantValueForReg(IdxReg, MRI, LaneIdx))
2951 return false;
2952
2953 // Perform the lane insert.
2954 unsigned SrcReg = I.getOperand(1).getReg();
2955 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2956 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002957
2958 if (VecSize < 128) {
2959 // If the vector we're inserting into is smaller than 128 bits, widen it
2960 // to 128 to do the insert.
2961 MachineInstr *ScalarToVec = emitScalarToVector(
2962 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
2963 if (!ScalarToVec)
2964 return false;
2965 SrcReg = ScalarToVec->getOperand(0).getReg();
2966 }
2967
2968 // Create an insert into a new FPR128 register.
2969 // Note that if our vector is already 128 bits, we end up emitting an extra
2970 // register.
2971 MachineInstr *InsMI =
2972 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2973
2974 if (VecSize < 128) {
2975 // If we had to widen to perform the insert, then we have to demote back to
2976 // the original size to get the result we want.
2977 unsigned DemoteVec = InsMI->getOperand(0).getReg();
2978 const TargetRegisterClass *RC =
2979 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
2980 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2981 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2982 return false;
2983 }
2984 unsigned SubReg = 0;
2985 if (!getSubRegForClass(RC, TRI, SubReg))
2986 return false;
2987 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2988 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
2989 << "\n");
2990 return false;
2991 }
2992 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2993 .addReg(DemoteVec, 0, SubReg);
2994 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2995 } else {
2996 // No widening needed.
2997 InsMI->getOperand(0).setReg(DstReg);
2998 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2999 }
3000
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003001 I.eraseFromParent();
3002 return true;
3003}
3004
Amara Emerson5ec14602018-12-10 18:44:58 +00003005bool AArch64InstructionSelector::selectBuildVector(
3006 MachineInstr &I, MachineRegisterInfo &MRI) const {
3007 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3008 // Until we port more of the optimized selections, for now just use a vector
3009 // insert sequence.
3010 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3011 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3012 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003013 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003014 return false; // Don't support all element types yet.
3015 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003016 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003017
3018 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003019 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003020 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3021 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003022 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003023 return false;
3024
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003025 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003026 unsigned DstSize = DstTy.getSizeInBits();
3027
3028 // Keep track of the last MI we inserted. Later on, we might be able to save
3029 // a copy using it.
3030 MachineInstr *PrevMI = nullptr;
3031 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003032 // Note that if we don't do a subregister copy, we can end up making an
3033 // extra register.
3034 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3035 MIRBuilder);
3036 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003037 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003038
3039 // If DstTy's size in bits is less than 128, then emit a subregister copy
3040 // from DstVec to the last register we've defined.
3041 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003042 // Force this to be FPR using the destination vector.
3043 const TargetRegisterClass *RC =
3044 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003045 if (!RC)
3046 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003047 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3048 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3049 return false;
3050 }
3051
3052 unsigned SubReg = 0;
3053 if (!getSubRegForClass(RC, TRI, SubReg))
3054 return false;
3055 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3056 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3057 << "\n");
3058 return false;
3059 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003060
3061 unsigned Reg = MRI.createVirtualRegister(RC);
3062 unsigned DstReg = I.getOperand(0).getReg();
3063
Amara Emerson86271782019-03-18 19:20:10 +00003064 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3065 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003066 MachineOperand &RegOp = I.getOperand(1);
3067 RegOp.setReg(Reg);
3068 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3069 } else {
3070 // We don't need a subregister copy. Save a copy by re-using the
3071 // destination register on the final insert.
3072 assert(PrevMI && "PrevMI was null?");
3073 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3074 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3075 }
3076
Amara Emerson5ec14602018-12-10 18:44:58 +00003077 I.eraseFromParent();
3078 return true;
3079}
3080
Jessica Paquette22c62152019-04-02 19:57:26 +00003081/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3082/// intrinsic.
3083static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3084 switch (NumBytesToStore) {
3085 // TODO: 1, 2, and 4 byte stores.
3086 case 8:
3087 return AArch64::STLXRX;
3088 default:
3089 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3090 << NumBytesToStore << ")\n");
3091 break;
3092 }
3093 return 0;
3094}
3095
3096bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3097 MachineInstr &I, MachineRegisterInfo &MRI) const {
3098 // Find the intrinsic ID.
3099 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3100 return Op.isIntrinsicID();
3101 });
3102 if (IntrinOp == I.operands_end())
3103 return false;
3104 unsigned IntrinID = IntrinOp->getIntrinsicID();
3105 MachineIRBuilder MIRBuilder(I);
3106
3107 // Select the instruction.
3108 switch (IntrinID) {
3109 default:
3110 return false;
3111 case Intrinsic::trap:
3112 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3113 break;
3114 case Intrinsic::aarch64_stlxr:
3115 unsigned StatReg = I.getOperand(0).getReg();
3116 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3117 "Status register must be 32 bits!");
3118 unsigned SrcReg = I.getOperand(2).getReg();
3119
3120 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3121 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3122 return false;
3123 }
3124
3125 unsigned PtrReg = I.getOperand(3).getReg();
3126 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3127
3128 // Expect only one memory operand.
3129 if (!I.hasOneMemOperand())
3130 return false;
3131
3132 const MachineMemOperand *MemOp = *I.memoperands_begin();
3133 unsigned NumBytesToStore = MemOp->getSize();
3134 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3135 if (!Opc)
3136 return false;
3137
3138 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3139 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3140 }
3141
3142 I.eraseFromParent();
3143 return true;
3144}
3145
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003146/// SelectArithImmed - Select an immediate value that can be represented as
3147/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3148/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003149InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00003150AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003151 MachineInstr &MI = *Root.getParent();
3152 MachineBasicBlock &MBB = *MI.getParent();
3153 MachineFunction &MF = *MBB.getParent();
3154 MachineRegisterInfo &MRI = MF.getRegInfo();
3155
3156 // This function is called from the addsub_shifted_imm ComplexPattern,
3157 // which lists [imm] as the list of opcode it's interested in, however
3158 // we still need to check whether the operand is actually an immediate
3159 // here because the ComplexPattern opcode list is only used in
3160 // root-level opcode matching.
3161 uint64_t Immed;
3162 if (Root.isImm())
3163 Immed = Root.getImm();
3164 else if (Root.isCImm())
3165 Immed = Root.getCImm()->getZExtValue();
3166 else if (Root.isReg()) {
3167 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3168 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003169 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003170 MachineOperand &Op1 = Def->getOperand(1);
3171 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003172 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003173 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003174 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003175 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003176
3177 unsigned ShiftAmt;
3178
3179 if (Immed >> 12 == 0) {
3180 ShiftAmt = 0;
3181 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3182 ShiftAmt = 12;
3183 Immed = Immed >> 12;
3184 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003185 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003186
3187 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003188 return {{
3189 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3190 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3191 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003192}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003193
Daniel Sandersea8711b2017-10-16 03:36:29 +00003194/// Select a "register plus unscaled signed 9-bit immediate" address. This
3195/// should only match when there is an offset that is not valid for a scaled
3196/// immediate addressing mode. The "Size" argument is the size in bytes of the
3197/// memory reference, which is needed here to know what is valid for a scaled
3198/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003199InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003200AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3201 unsigned Size) const {
3202 MachineRegisterInfo &MRI =
3203 Root.getParent()->getParent()->getParent()->getRegInfo();
3204
3205 if (!Root.isReg())
3206 return None;
3207
3208 if (!isBaseWithConstantOffset(Root, MRI))
3209 return None;
3210
3211 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3212 if (!RootDef)
3213 return None;
3214
3215 MachineOperand &OffImm = RootDef->getOperand(2);
3216 if (!OffImm.isReg())
3217 return None;
3218 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3219 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3220 return None;
3221 int64_t RHSC;
3222 MachineOperand &RHSOp1 = RHS->getOperand(1);
3223 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3224 return None;
3225 RHSC = RHSOp1.getCImm()->getSExtValue();
3226
3227 // If the offset is valid as a scaled immediate, don't match here.
3228 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3229 return None;
3230 if (RHSC >= -256 && RHSC < 256) {
3231 MachineOperand &Base = RootDef->getOperand(1);
3232 return {{
3233 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3234 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3235 }};
3236 }
3237 return None;
3238}
3239
3240/// Select a "register plus scaled unsigned 12-bit immediate" address. The
3241/// "Size" argument is the size in bytes of the memory reference, which
3242/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003243InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003244AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3245 unsigned Size) const {
3246 MachineRegisterInfo &MRI =
3247 Root.getParent()->getParent()->getParent()->getRegInfo();
3248
3249 if (!Root.isReg())
3250 return None;
3251
3252 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3253 if (!RootDef)
3254 return None;
3255
3256 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3257 return {{
3258 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3259 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3260 }};
3261 }
3262
3263 if (isBaseWithConstantOffset(Root, MRI)) {
3264 MachineOperand &LHS = RootDef->getOperand(1);
3265 MachineOperand &RHS = RootDef->getOperand(2);
3266 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3267 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3268 if (LHSDef && RHSDef) {
3269 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3270 unsigned Scale = Log2_32(Size);
3271 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3272 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00003273 return {{
3274 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3275 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3276 }};
3277
Daniel Sandersea8711b2017-10-16 03:36:29 +00003278 return {{
3279 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3280 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3281 }};
3282 }
3283 }
3284 }
3285
3286 // Before falling back to our general case, check if the unscaled
3287 // instructions can handle this. If so, that's preferable.
3288 if (selectAddrModeUnscaled(Root, Size).hasValue())
3289 return None;
3290
3291 return {{
3292 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3293 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3294 }};
3295}
3296
Volkan Kelesf7f25682018-01-16 18:44:05 +00003297void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3298 const MachineInstr &MI) const {
3299 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3300 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3301 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
3302 assert(CstVal && "Expected constant value");
3303 MIB.addImm(CstVal.getValue());
3304}
3305
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003306namespace llvm {
3307InstructionSelector *
3308createAArch64InstructionSelector(const AArch64TargetMachine &TM,
3309 AArch64Subtarget &Subtarget,
3310 AArch64RegisterBankInfo &RBI) {
3311 return new AArch64InstructionSelector(TM, Subtarget, RBI);
3312}
3313}