blob: c87267a129dc7b4515a8390383e440b8c7f0520c [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson9bf092d2019-04-09 21:22:43 +000070 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72
Amara Emerson5ec14602018-12-10 18:44:58 +000073 // Helper to generate an equivalent of scalar_to_vector into a new register,
74 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000075 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000076 const TargetRegisterClass *DstRC,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000077 Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000078 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000079
80 /// Emit a lane insert into \p DstReg, or a new vector register if None is
81 /// provided.
82 ///
83 /// The lane inserted into is defined by \p LaneIdx. The vector source
84 /// register is given by \p SrcReg. The register containing the element is
85 /// given by \p EltReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000086 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
87 Register EltReg, unsigned LaneIdx,
Jessica Paquette16d67a32019-03-13 23:22:23 +000088 const RegisterBank &RB,
89 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000090 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000092 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000093 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000094
Amara Emerson1abe05c2019-02-21 20:20:16 +000095 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +000096 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000097 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000098 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000099 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000100 bool selectSplitVectorUnmerge(MachineInstr &I,
101 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000102 bool selectIntrinsicWithSideEffects(MachineInstr &I,
103 MachineRegisterInfo &MRI) const;
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +0000104 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000105 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000106 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000107 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson6e71b342019-06-21 18:10:41 +0000108 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
109 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
110
Amara Emerson1abe05c2019-02-21 20:20:16 +0000111 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
112 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
113 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000114
115 // Emit a vector concat operation.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000116 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
117 Register Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000118 MachineIRBuilder &MIRBuilder) const;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000119 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
Amara Emersond61b89b2019-03-14 22:48:18 +0000120 const RegisterBank &DstRB, LLT ScalarTy,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000121 Register VecReg, unsigned LaneIdx,
Amara Emersond61b89b2019-03-14 22:48:18 +0000122 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000123
Jessica Paquettea3843fe2019-05-01 22:39:43 +0000124 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
125 /// materialized using a FMOV instruction, then update MI and return it.
126 /// Otherwise, do nothing and return a nullptr.
127 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
128 MachineRegisterInfo &MRI) const;
129
Jessica Paquette49537bb2019-06-17 18:40:06 +0000130 /// Emit a CSet for a compare.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000131 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +0000132 MachineIRBuilder &MIRBuilder) const;
133
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000134 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000135
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000136 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
137 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000138
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000139 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000140 return selectAddrModeUnscaled(Root, 1);
141 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000142 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000143 return selectAddrModeUnscaled(Root, 2);
144 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000145 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000146 return selectAddrModeUnscaled(Root, 4);
147 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000148 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000149 return selectAddrModeUnscaled(Root, 8);
150 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000151 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000152 return selectAddrModeUnscaled(Root, 16);
153 }
154
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000155 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
156 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000157 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000158 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000159 return selectAddrModeIndexed(Root, Width / 8);
160 }
161
Volkan Kelesf7f25682018-01-16 18:44:05 +0000162 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
163
Amara Emerson1e8c1642018-07-31 00:09:02 +0000164 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
165 void materializeLargeCMVal(MachineInstr &I, const Value *V,
166 unsigned char OpFlags) const;
167
Amara Emerson761ca2e2019-03-19 21:43:05 +0000168 // Optimization methods.
169
170 // Helper function to check if a reg def is an MI with a given opcode and
171 // returns it if so.
172 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
173 MachineIRBuilder &MIB) const {
174 auto *Def = MIB.getMRI()->getVRegDef(Reg);
175 if (!Def || Def->getOpcode() != Opc)
176 return nullptr;
177 return Def;
178 }
179
180 bool tryOptVectorShuffle(MachineInstr &I) const;
181 bool tryOptVectorDup(MachineInstr &MI) const;
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000182 bool tryOptSelect(MachineInstr &MI) const;
Jessica Paquette49537bb2019-06-17 18:40:06 +0000183 bool tryOptCMN(MachineInstr &MI) const;
Amara Emerson761ca2e2019-03-19 21:43:05 +0000184
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000185 const AArch64TargetMachine &TM;
186 const AArch64Subtarget &STI;
187 const AArch64InstrInfo &TII;
188 const AArch64RegisterInfo &TRI;
189 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000190
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000191#define GET_GLOBALISEL_PREDICATES_DECL
192#include "AArch64GenGlobalISel.inc"
193#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000194
195// We declare the temporaries used by selectImpl() in the class to minimize the
196// cost of constructing placeholder values.
197#define GET_GLOBALISEL_TEMPORARIES_DECL
198#include "AArch64GenGlobalISel.inc"
199#undef GET_GLOBALISEL_TEMPORARIES_DECL
200};
201
202} // end anonymous namespace
203
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000204#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000205#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000206#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000207
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000208AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000209 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
210 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000211 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000212 TRI(*STI.getRegisterInfo()), RBI(RBI),
213#define GET_GLOBALISEL_PREDICATES_INIT
214#include "AArch64GenGlobalISel.inc"
215#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000216#define GET_GLOBALISEL_TEMPORARIES_INIT
217#include "AArch64GenGlobalISel.inc"
218#undef GET_GLOBALISEL_TEMPORARIES_INIT
219{
220}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000221
Tim Northoverfb8d9892016-10-12 22:49:15 +0000222// FIXME: This should be target-independent, inferred from the types declared
223// for each class in the bank.
224static const TargetRegisterClass *
225getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000226 const RegisterBankInfo &RBI,
227 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000228 if (RB.getID() == AArch64::GPRRegBankID) {
229 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000230 return GetAllRegSet ? &AArch64::GPR32allRegClass
231 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000232 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000233 return GetAllRegSet ? &AArch64::GPR64allRegClass
234 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000235 return nullptr;
236 }
237
238 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000239 if (Ty.getSizeInBits() <= 16)
240 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000241 if (Ty.getSizeInBits() == 32)
242 return &AArch64::FPR32RegClass;
243 if (Ty.getSizeInBits() == 64)
244 return &AArch64::FPR64RegClass;
245 if (Ty.getSizeInBits() == 128)
246 return &AArch64::FPR128RegClass;
247 return nullptr;
248 }
249
250 return nullptr;
251}
252
Jessica Paquette245047d2019-01-24 22:00:41 +0000253/// Given a register bank, and size in bits, return the smallest register class
254/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000255static const TargetRegisterClass *
256getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
257 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000258 unsigned RegBankID = RB.getID();
259
260 if (RegBankID == AArch64::GPRRegBankID) {
261 if (SizeInBits <= 32)
262 return GetAllRegSet ? &AArch64::GPR32allRegClass
263 : &AArch64::GPR32RegClass;
264 if (SizeInBits == 64)
265 return GetAllRegSet ? &AArch64::GPR64allRegClass
266 : &AArch64::GPR64RegClass;
267 }
268
269 if (RegBankID == AArch64::FPRRegBankID) {
270 switch (SizeInBits) {
271 default:
272 return nullptr;
273 case 8:
274 return &AArch64::FPR8RegClass;
275 case 16:
276 return &AArch64::FPR16RegClass;
277 case 32:
278 return &AArch64::FPR32RegClass;
279 case 64:
280 return &AArch64::FPR64RegClass;
281 case 128:
282 return &AArch64::FPR128RegClass;
283 }
284 }
285
286 return nullptr;
287}
288
289/// Returns the correct subregister to use for a given register class.
290static bool getSubRegForClass(const TargetRegisterClass *RC,
291 const TargetRegisterInfo &TRI, unsigned &SubReg) {
292 switch (TRI.getRegSizeInBits(*RC)) {
293 case 8:
294 SubReg = AArch64::bsub;
295 break;
296 case 16:
297 SubReg = AArch64::hsub;
298 break;
299 case 32:
300 if (RC == &AArch64::GPR32RegClass)
301 SubReg = AArch64::sub_32;
302 else
303 SubReg = AArch64::ssub;
304 break;
305 case 64:
306 SubReg = AArch64::dsub;
307 break;
308 default:
309 LLVM_DEBUG(
310 dbgs() << "Couldn't find appropriate subregister for register class.");
311 return false;
312 }
313
314 return true;
315}
316
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000317/// Check whether \p I is a currently unsupported binary operation:
318/// - it has an unsized type
319/// - an operand is not a vreg
320/// - all operands are not in the same bank
321/// These are checks that should someday live in the verifier, but right now,
322/// these are mostly limitations of the aarch64 selector.
323static bool unsupportedBinOp(const MachineInstr &I,
324 const AArch64RegisterBankInfo &RBI,
325 const MachineRegisterInfo &MRI,
326 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000327 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000328 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000329 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000330 return true;
331 }
332
333 const RegisterBank *PrevOpBank = nullptr;
334 for (auto &MO : I.operands()) {
335 // FIXME: Support non-register operands.
336 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000337 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000338 return true;
339 }
340
341 // FIXME: Can generic operations have physical registers operands? If
342 // so, this will need to be taught about that, and we'll need to get the
343 // bank out of the minimal class for the register.
344 // Either way, this needs to be documented (and possibly verified).
345 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000346 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000347 return true;
348 }
349
350 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
351 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000352 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000353 return true;
354 }
355
356 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000357 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000358 return true;
359 }
360 PrevOpBank = OpBank;
361 }
362 return false;
363}
364
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000365/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000366/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000367/// and of size \p OpSize.
368/// \returns \p GenericOpc if the combination is unsupported.
369static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
370 unsigned OpSize) {
371 switch (RegBankID) {
372 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000373 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000374 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000375 case TargetOpcode::G_SHL:
376 return AArch64::LSLVWr;
377 case TargetOpcode::G_LSHR:
378 return AArch64::LSRVWr;
379 case TargetOpcode::G_ASHR:
380 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000381 default:
382 return GenericOpc;
383 }
Tim Northover55782222016-10-18 20:03:48 +0000384 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000385 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000386 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000387 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000388 case TargetOpcode::G_SHL:
389 return AArch64::LSLVXr;
390 case TargetOpcode::G_LSHR:
391 return AArch64::LSRVXr;
392 case TargetOpcode::G_ASHR:
393 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000394 default:
395 return GenericOpc;
396 }
397 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000398 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000399 case AArch64::FPRRegBankID:
400 switch (OpSize) {
401 case 32:
402 switch (GenericOpc) {
403 case TargetOpcode::G_FADD:
404 return AArch64::FADDSrr;
405 case TargetOpcode::G_FSUB:
406 return AArch64::FSUBSrr;
407 case TargetOpcode::G_FMUL:
408 return AArch64::FMULSrr;
409 case TargetOpcode::G_FDIV:
410 return AArch64::FDIVSrr;
411 default:
412 return GenericOpc;
413 }
414 case 64:
415 switch (GenericOpc) {
416 case TargetOpcode::G_FADD:
417 return AArch64::FADDDrr;
418 case TargetOpcode::G_FSUB:
419 return AArch64::FSUBDrr;
420 case TargetOpcode::G_FMUL:
421 return AArch64::FMULDrr;
422 case TargetOpcode::G_FDIV:
423 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000424 case TargetOpcode::G_OR:
425 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000426 default:
427 return GenericOpc;
428 }
429 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000430 break;
431 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000432 return GenericOpc;
433}
434
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000435/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
436/// appropriate for the (value) register bank \p RegBankID and of memory access
437/// size \p OpSize. This returns the variant with the base+unsigned-immediate
438/// addressing mode (e.g., LDRXui).
439/// \returns \p GenericOpc if the combination is unsupported.
440static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
441 unsigned OpSize) {
442 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
443 switch (RegBankID) {
444 case AArch64::GPRRegBankID:
445 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000446 case 8:
447 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
448 case 16:
449 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000450 case 32:
451 return isStore ? AArch64::STRWui : AArch64::LDRWui;
452 case 64:
453 return isStore ? AArch64::STRXui : AArch64::LDRXui;
454 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000455 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000456 case AArch64::FPRRegBankID:
457 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000458 case 8:
459 return isStore ? AArch64::STRBui : AArch64::LDRBui;
460 case 16:
461 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000462 case 32:
463 return isStore ? AArch64::STRSui : AArch64::LDRSui;
464 case 64:
465 return isStore ? AArch64::STRDui : AArch64::LDRDui;
466 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000467 break;
468 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000469 return GenericOpc;
470}
471
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000472#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000473/// Helper function that verifies that we have a valid copy at the end of
474/// selectCopy. Verifies that the source and dest have the expected sizes and
475/// then returns true.
476static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
477 const MachineRegisterInfo &MRI,
478 const TargetRegisterInfo &TRI,
479 const RegisterBankInfo &RBI) {
480 const unsigned DstReg = I.getOperand(0).getReg();
481 const unsigned SrcReg = I.getOperand(1).getReg();
482 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
483 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000484
Jessica Paquette245047d2019-01-24 22:00:41 +0000485 // Make sure the size of the source and dest line up.
486 assert(
487 (DstSize == SrcSize ||
488 // Copies are a mean to setup initial types, the number of
489 // bits may not exactly match.
490 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
491 // Copies are a mean to copy bits around, as long as we are
492 // on the same register class, that's fine. Otherwise, that
493 // means we need some SUBREG_TO_REG or AND & co.
494 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
495 "Copy with different width?!");
496
497 // Check the size of the destination.
498 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
499 "GPRs cannot get more than 64-bit width values");
500
501 return true;
502}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000503#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000504
505/// Helper function for selectCopy. Inserts a subregister copy from
506/// \p *From to \p *To, linking it up to \p I.
507///
508/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
509///
510/// CopyReg (From class) = COPY SrcReg
511/// SubRegCopy (To class) = COPY CopyReg:SubReg
512/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000513static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000514 const RegisterBankInfo &RBI, unsigned SrcReg,
515 const TargetRegisterClass *From,
516 const TargetRegisterClass *To,
517 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000518 MachineIRBuilder MIB(I);
519 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000520 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
521 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000522 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000523 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000524
525 // It's possible that the destination register won't be constrained. Make
526 // sure that happens.
527 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
528 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
529
Amara Emersondb211892018-02-20 05:11:57 +0000530 return true;
531}
532
Jessica Paquette910630c2019-05-03 22:37:46 +0000533/// Helper function to get the source and destination register classes for a
534/// copy. Returns a std::pair containing the source register class for the
535/// copy, and the destination register class for the copy. If a register class
536/// cannot be determined, then it will be nullptr.
537static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
538getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
539 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
540 const RegisterBankInfo &RBI) {
541 unsigned DstReg = I.getOperand(0).getReg();
542 unsigned SrcReg = I.getOperand(1).getReg();
543 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
544 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
545 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
546 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
547
548 // Special casing for cross-bank copies of s1s. We can technically represent
549 // a 1-bit value with any size of register. The minimum size for a GPR is 32
550 // bits. So, we need to put the FPR on 32 bits as well.
551 //
552 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
553 // then we can pull it into the helpers that get the appropriate class for a
554 // register bank. Or make a new helper that carries along some constraint
555 // information.
556 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
557 SrcSize = DstSize = 32;
558
559 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
560 getMinClassForRegBank(DstRegBank, DstSize, true)};
561}
562
Quentin Colombetcb629a82016-10-12 03:57:49 +0000563static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
564 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
565 const RegisterBankInfo &RBI) {
566
567 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000568 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000569 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
570 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
Jessica Paquette910630c2019-05-03 22:37:46 +0000571
572 // Find the correct register classes for the source and destination registers.
573 const TargetRegisterClass *SrcRC;
574 const TargetRegisterClass *DstRC;
575 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
576
Jessica Paquette245047d2019-01-24 22:00:41 +0000577 if (!DstRC) {
578 LLVM_DEBUG(dbgs() << "Unexpected dest size "
579 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000580 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000581 }
582
Jessica Paquette245047d2019-01-24 22:00:41 +0000583 // A couple helpers below, for making sure that the copy we produce is valid.
584
585 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
586 // to verify that the src and dst are the same size, since that's handled by
587 // the SUBREG_TO_REG.
588 bool KnownValid = false;
589
590 // Returns true, or asserts if something we don't expect happens. Instead of
591 // returning true, we return isValidCopy() to ensure that we verify the
592 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000593 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000594 // If we have a bitcast or something, we can't have physical registers.
595 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000596 (I.isCopy() ||
597 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
598 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
599 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000600 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000601 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000602 return true;
603 };
604
605 // Is this a copy? If so, then we may need to insert a subregister copy, or
606 // a SUBREG_TO_REG.
607 if (I.isCopy()) {
608 // Yes. Check if there's anything to fix up.
Amara Emerson7e9f3482018-02-18 17:10:49 +0000609 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000610 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
611 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000612 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000613
614 // Is this a cross-bank copy?
615 if (DstRegBank.getID() != SrcRegBank.getID()) {
616 // If we're doing a cross-bank copy on different-sized registers, we need
617 // to do a bit more work.
618 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
619 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
620
621 if (SrcSize > DstSize) {
622 // We're doing a cross-bank copy into a smaller register. We need a
623 // subregister copy. First, get a register class that's on the same bank
624 // as the destination, but the same size as the source.
625 const TargetRegisterClass *SubregRC =
626 getMinClassForRegBank(DstRegBank, SrcSize, true);
627 assert(SubregRC && "Didn't get a register class for subreg?");
628
629 // Get the appropriate subregister for the destination.
630 unsigned SubReg = 0;
631 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
632 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
633 return false;
634 }
635
636 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000637 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000638 return CheckCopy();
639 }
640
641 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
642 SrcSize == 16) {
643 // Special case for FPR16 to GPR32.
644 // FIXME: This can probably be generalized like the above case.
645 unsigned PromoteReg =
646 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
647 BuildMI(*I.getParent(), I, I.getDebugLoc(),
648 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
649 .addImm(0)
650 .addUse(SrcReg)
651 .addImm(AArch64::hsub);
652 MachineOperand &RegOp = I.getOperand(1);
653 RegOp.setReg(PromoteReg);
654
655 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
656 KnownValid = true;
657 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000658 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000659
660 // If the destination is a physical register, then there's nothing to
661 // change, so we're done.
662 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
663 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000664 }
665
Jessica Paquette245047d2019-01-24 22:00:41 +0000666 // No need to constrain SrcReg. It will get constrained when we hit another
667 // of its use or its defs. Copies do not have constraints.
668 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000669 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
670 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000671 return false;
672 }
673 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000674 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000675}
676
Tim Northover69271c62016-10-12 22:49:11 +0000677static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
678 if (!DstTy.isScalar() || !SrcTy.isScalar())
679 return GenericOpc;
680
681 const unsigned DstSize = DstTy.getSizeInBits();
682 const unsigned SrcSize = SrcTy.getSizeInBits();
683
684 switch (DstSize) {
685 case 32:
686 switch (SrcSize) {
687 case 32:
688 switch (GenericOpc) {
689 case TargetOpcode::G_SITOFP:
690 return AArch64::SCVTFUWSri;
691 case TargetOpcode::G_UITOFP:
692 return AArch64::UCVTFUWSri;
693 case TargetOpcode::G_FPTOSI:
694 return AArch64::FCVTZSUWSr;
695 case TargetOpcode::G_FPTOUI:
696 return AArch64::FCVTZUUWSr;
697 default:
698 return GenericOpc;
699 }
700 case 64:
701 switch (GenericOpc) {
702 case TargetOpcode::G_SITOFP:
703 return AArch64::SCVTFUXSri;
704 case TargetOpcode::G_UITOFP:
705 return AArch64::UCVTFUXSri;
706 case TargetOpcode::G_FPTOSI:
707 return AArch64::FCVTZSUWDr;
708 case TargetOpcode::G_FPTOUI:
709 return AArch64::FCVTZUUWDr;
710 default:
711 return GenericOpc;
712 }
713 default:
714 return GenericOpc;
715 }
716 case 64:
717 switch (SrcSize) {
718 case 32:
719 switch (GenericOpc) {
720 case TargetOpcode::G_SITOFP:
721 return AArch64::SCVTFUWDri;
722 case TargetOpcode::G_UITOFP:
723 return AArch64::UCVTFUWDri;
724 case TargetOpcode::G_FPTOSI:
725 return AArch64::FCVTZSUXSr;
726 case TargetOpcode::G_FPTOUI:
727 return AArch64::FCVTZUUXSr;
728 default:
729 return GenericOpc;
730 }
731 case 64:
732 switch (GenericOpc) {
733 case TargetOpcode::G_SITOFP:
734 return AArch64::SCVTFUXDri;
735 case TargetOpcode::G_UITOFP:
736 return AArch64::UCVTFUXDri;
737 case TargetOpcode::G_FPTOSI:
738 return AArch64::FCVTZSUXDr;
739 case TargetOpcode::G_FPTOUI:
740 return AArch64::FCVTZUUXDr;
741 default:
742 return GenericOpc;
743 }
744 default:
745 return GenericOpc;
746 }
747 default:
748 return GenericOpc;
749 };
750 return GenericOpc;
751}
752
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000753static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
754 const RegisterBankInfo &RBI) {
755 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
756 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
757 AArch64::GPRRegBankID);
758 LLT Ty = MRI.getType(I.getOperand(0).getReg());
759 if (Ty == LLT::scalar(32))
760 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
761 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
762 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
763 return 0;
764}
765
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +0000766/// Helper function to select the opcode for a G_FCMP.
767static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
768 // If this is a compare against +0.0, then we don't have to explicitly
769 // materialize a constant.
770 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
771 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
772 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
773 if (OpSize != 32 && OpSize != 64)
774 return 0;
775 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
776 {AArch64::FCMPSri, AArch64::FCMPDri}};
777 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
778}
779
Tim Northover6c02ad52016-10-12 22:49:04 +0000780static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
781 switch (P) {
782 default:
783 llvm_unreachable("Unknown condition code!");
784 case CmpInst::ICMP_NE:
785 return AArch64CC::NE;
786 case CmpInst::ICMP_EQ:
787 return AArch64CC::EQ;
788 case CmpInst::ICMP_SGT:
789 return AArch64CC::GT;
790 case CmpInst::ICMP_SGE:
791 return AArch64CC::GE;
792 case CmpInst::ICMP_SLT:
793 return AArch64CC::LT;
794 case CmpInst::ICMP_SLE:
795 return AArch64CC::LE;
796 case CmpInst::ICMP_UGT:
797 return AArch64CC::HI;
798 case CmpInst::ICMP_UGE:
799 return AArch64CC::HS;
800 case CmpInst::ICMP_ULT:
801 return AArch64CC::LO;
802 case CmpInst::ICMP_ULE:
803 return AArch64CC::LS;
804 }
805}
806
Tim Northover7dd378d2016-10-12 22:49:07 +0000807static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
808 AArch64CC::CondCode &CondCode,
809 AArch64CC::CondCode &CondCode2) {
810 CondCode2 = AArch64CC::AL;
811 switch (P) {
812 default:
813 llvm_unreachable("Unknown FP condition!");
814 case CmpInst::FCMP_OEQ:
815 CondCode = AArch64CC::EQ;
816 break;
817 case CmpInst::FCMP_OGT:
818 CondCode = AArch64CC::GT;
819 break;
820 case CmpInst::FCMP_OGE:
821 CondCode = AArch64CC::GE;
822 break;
823 case CmpInst::FCMP_OLT:
824 CondCode = AArch64CC::MI;
825 break;
826 case CmpInst::FCMP_OLE:
827 CondCode = AArch64CC::LS;
828 break;
829 case CmpInst::FCMP_ONE:
830 CondCode = AArch64CC::MI;
831 CondCode2 = AArch64CC::GT;
832 break;
833 case CmpInst::FCMP_ORD:
834 CondCode = AArch64CC::VC;
835 break;
836 case CmpInst::FCMP_UNO:
837 CondCode = AArch64CC::VS;
838 break;
839 case CmpInst::FCMP_UEQ:
840 CondCode = AArch64CC::EQ;
841 CondCode2 = AArch64CC::VS;
842 break;
843 case CmpInst::FCMP_UGT:
844 CondCode = AArch64CC::HI;
845 break;
846 case CmpInst::FCMP_UGE:
847 CondCode = AArch64CC::PL;
848 break;
849 case CmpInst::FCMP_ULT:
850 CondCode = AArch64CC::LT;
851 break;
852 case CmpInst::FCMP_ULE:
853 CondCode = AArch64CC::LE;
854 break;
855 case CmpInst::FCMP_UNE:
856 CondCode = AArch64CC::NE;
857 break;
858 }
859}
860
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000861bool AArch64InstructionSelector::selectCompareBranch(
862 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
863
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000864 const Register CondReg = I.getOperand(0).getReg();
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000865 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
866 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000867 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
868 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000869 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
870 return false;
871
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000872 Register LHS = CCMI->getOperand(2).getReg();
873 Register RHS = CCMI->getOperand(3).getReg();
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000874 if (!getConstantVRegVal(RHS, MRI))
875 std::swap(RHS, LHS);
876
877 const auto RHSImm = getConstantVRegVal(RHS, MRI);
878 if (!RHSImm || *RHSImm != 0)
879 return false;
880
881 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
882 if (RB.getID() != AArch64::GPRRegBankID)
883 return false;
884
885 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
886 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
887 return false;
888
889 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
890 unsigned CBOpc = 0;
891 if (CmpWidth <= 32)
892 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
893 else if (CmpWidth == 64)
894 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
895 else
896 return false;
897
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000898 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
899 .addUse(LHS)
900 .addMBB(DestMBB)
901 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000902
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000903 I.eraseFromParent();
904 return true;
905}
906
Amara Emerson9bf092d2019-04-09 21:22:43 +0000907bool AArch64InstructionSelector::selectVectorSHL(
908 MachineInstr &I, MachineRegisterInfo &MRI) const {
909 assert(I.getOpcode() == TargetOpcode::G_SHL);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000910 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000911 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000912 Register Src1Reg = I.getOperand(1).getReg();
913 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000914
915 if (!Ty.isVector())
916 return false;
917
918 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000919 if (Ty == LLT::vector(4, 32)) {
920 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000921 } else if (Ty == LLT::vector(2, 32)) {
922 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000923 } else {
924 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
925 return false;
926 }
927
928 MachineIRBuilder MIB(I);
929 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
930 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
931 I.eraseFromParent();
932 return true;
933}
934
935bool AArch64InstructionSelector::selectVectorASHR(
936 MachineInstr &I, MachineRegisterInfo &MRI) const {
937 assert(I.getOpcode() == TargetOpcode::G_ASHR);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000938 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000939 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000940 Register Src1Reg = I.getOperand(1).getReg();
941 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000942
943 if (!Ty.isVector())
944 return false;
945
946 // There is not a shift right register instruction, but the shift left
947 // register instruction takes a signed value, where negative numbers specify a
948 // right shift.
949
950 unsigned Opc = 0;
951 unsigned NegOpc = 0;
952 const TargetRegisterClass *RC = nullptr;
953 if (Ty == LLT::vector(4, 32)) {
954 Opc = AArch64::SSHLv4i32;
955 NegOpc = AArch64::NEGv4i32;
956 RC = &AArch64::FPR128RegClass;
957 } else if (Ty == LLT::vector(2, 32)) {
958 Opc = AArch64::SSHLv2i32;
959 NegOpc = AArch64::NEGv2i32;
960 RC = &AArch64::FPR64RegClass;
961 } else {
962 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
963 return false;
964 }
965
966 MachineIRBuilder MIB(I);
967 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
968 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
969 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
970 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
971 I.eraseFromParent();
972 return true;
973}
974
Tim Northovere9600d82017-02-08 17:57:27 +0000975bool AArch64InstructionSelector::selectVaStartAAPCS(
976 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
977 return false;
978}
979
980bool AArch64InstructionSelector::selectVaStartDarwin(
981 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
982 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000983 Register ListReg = I.getOperand(0).getReg();
Tim Northovere9600d82017-02-08 17:57:27 +0000984
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000985 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Tim Northovere9600d82017-02-08 17:57:27 +0000986
987 auto MIB =
988 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
989 .addDef(ArgsAddrReg)
990 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
991 .addImm(0)
992 .addImm(0);
993
994 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
995
996 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
997 .addUse(ArgsAddrReg)
998 .addUse(ListReg)
999 .addImm(0)
1000 .addMemOperand(*I.memoperands_begin());
1001
1002 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1003 I.eraseFromParent();
1004 return true;
1005}
1006
Amara Emerson1e8c1642018-07-31 00:09:02 +00001007void AArch64InstructionSelector::materializeLargeCMVal(
1008 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1009 MachineBasicBlock &MBB = *I.getParent();
1010 MachineFunction &MF = *MBB.getParent();
1011 MachineRegisterInfo &MRI = MF.getRegInfo();
1012 MachineIRBuilder MIB(I);
1013
Aditya Nandakumarcef44a22018-12-11 00:48:50 +00001014 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +00001015 MovZ->addOperand(MF, I.getOperand(1));
1016 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1017 AArch64II::MO_NC);
1018 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1019 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1020
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001021 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1022 Register ForceDstReg) {
1023 Register DstReg = ForceDstReg
Amara Emerson1e8c1642018-07-31 00:09:02 +00001024 ? ForceDstReg
1025 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1026 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1027 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1028 MovI->addOperand(MF, MachineOperand::CreateGA(
1029 GV, MovZ->getOperand(1).getOffset(), Flags));
1030 } else {
1031 MovI->addOperand(
1032 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1033 MovZ->getOperand(1).getOffset(), Flags));
1034 }
1035 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1036 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1037 return DstReg;
1038 };
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001039 Register DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +00001040 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1041 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1042 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1043 return;
1044}
1045
Daniel Sandersf76f3152017-11-16 00:46:35 +00001046bool AArch64InstructionSelector::select(MachineInstr &I,
1047 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001048 assert(I.getParent() && "Instruction should be in a basic block!");
1049 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1050
1051 MachineBasicBlock &MBB = *I.getParent();
1052 MachineFunction &MF = *MBB.getParent();
1053 MachineRegisterInfo &MRI = MF.getRegInfo();
1054
Tim Northovercdf23f12016-10-31 18:30:59 +00001055 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001056 // G_PHI requires same handling as PHI
1057 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +00001058 // Certain non-generic instructions also need some special handling.
1059
1060 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1061 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001062
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001063 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001064 const Register DefReg = I.getOperand(0).getReg();
Tim Northover7d88da62016-11-08 00:34:06 +00001065 const LLT DefTy = MRI.getType(DefReg);
1066
1067 const TargetRegisterClass *DefRC = nullptr;
1068 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
1069 DefRC = TRI.getRegClass(DefReg);
1070 } else {
1071 const RegClassOrRegBank &RegClassOrBank =
1072 MRI.getRegClassOrRegBank(DefReg);
1073
1074 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1075 if (!DefRC) {
1076 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001077 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001078 return false;
1079 }
1080 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1081 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1082 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001083 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001084 return false;
1085 }
1086 }
1087 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001088 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001089
1090 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1091 }
1092
1093 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001094 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001095
1096 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001097 }
1098
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001099
1100 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001101 LLVM_DEBUG(
1102 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001103 return false;
1104 }
1105
Daniel Sandersf76f3152017-11-16 00:46:35 +00001106 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001107 return true;
1108
Tim Northover32a078a2016-09-15 10:09:59 +00001109 LLT Ty =
1110 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001111
Amara Emerson3739a202019-03-15 21:59:50 +00001112 MachineIRBuilder MIB(I);
1113
Tim Northover69271c62016-10-12 22:49:11 +00001114 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001115 case TargetOpcode::G_BRCOND: {
1116 if (Ty.getSizeInBits() > 32) {
1117 // We shouldn't need this on AArch64, but it would be implemented as an
1118 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1119 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001120 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1121 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001122 return false;
1123 }
1124
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001125 const Register CondReg = I.getOperand(0).getReg();
Tim Northover5e3dbf32016-10-12 22:49:01 +00001126 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1127
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001128 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1129 // instructions will not be produced, as they are conditional branch
1130 // instructions that do not set flags.
1131 bool ProduceNonFlagSettingCondBr =
1132 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1133 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001134 return true;
1135
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001136 if (ProduceNonFlagSettingCondBr) {
1137 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1138 .addUse(CondReg)
1139 .addImm(/*bit offset=*/0)
1140 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001141
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001142 I.eraseFromParent();
1143 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1144 } else {
1145 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1146 .addDef(AArch64::WZR)
1147 .addUse(CondReg)
1148 .addImm(1);
1149 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1150 auto Bcc =
1151 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1152 .addImm(AArch64CC::EQ)
1153 .addMBB(DestMBB);
1154
1155 I.eraseFromParent();
1156 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1157 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001158 }
1159
Kristof Beyls65a12c02017-01-30 09:13:18 +00001160 case TargetOpcode::G_BRINDIRECT: {
1161 I.setDesc(TII.get(AArch64::BR));
1162 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1163 }
1164
Amara Emerson6e71b342019-06-21 18:10:41 +00001165 case TargetOpcode::G_BRJT:
1166 return selectBrJT(I, MRI);
1167
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001168 case TargetOpcode::G_BSWAP: {
1169 // Handle vector types for G_BSWAP directly.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001170 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001171 LLT DstTy = MRI.getType(DstReg);
1172
1173 // We should only get vector types here; everything else is handled by the
1174 // importer right now.
1175 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1176 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1177 return false;
1178 }
1179
1180 // Only handle 4 and 2 element vectors for now.
1181 // TODO: 16-bit elements.
1182 unsigned NumElts = DstTy.getNumElements();
1183 if (NumElts != 4 && NumElts != 2) {
1184 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1185 return false;
1186 }
1187
1188 // Choose the correct opcode for the supported types. Right now, that's
1189 // v2s32, v4s32, and v2s64.
1190 unsigned Opc = 0;
1191 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1192 if (EltSize == 32)
1193 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1194 : AArch64::REV32v16i8;
1195 else if (EltSize == 64)
1196 Opc = AArch64::REV64v16i8;
1197
1198 // We should always get something by the time we get here...
1199 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1200
1201 I.setDesc(TII.get(Opc));
1202 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1203 }
1204
Tim Northover4494d692016-10-18 19:47:57 +00001205 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001206 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001207 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1208
Amara Emerson8f25a022019-06-21 16:43:50 +00001209 const LLT s8 = LLT::scalar(8);
1210 const LLT s16 = LLT::scalar(16);
Tim Northover4494d692016-10-18 19:47:57 +00001211 const LLT s32 = LLT::scalar(32);
1212 const LLT s64 = LLT::scalar(64);
1213 const LLT p0 = LLT::pointer(0, 64);
1214
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001215 const Register DefReg = I.getOperand(0).getReg();
Tim Northover4494d692016-10-18 19:47:57 +00001216 const LLT DefTy = MRI.getType(DefReg);
1217 const unsigned DefSize = DefTy.getSizeInBits();
1218 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1219
1220 // FIXME: Redundant check, but even less readable when factored out.
1221 if (isFP) {
1222 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001223 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1224 << " constant, expected: " << s32 << " or " << s64
1225 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001226 return false;
1227 }
1228
1229 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001230 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1231 << " constant on bank: " << RB
1232 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001233 return false;
1234 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001235
1236 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1237 // can be sure tablegen works correctly and isn't rescued by this code.
1238 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1239 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001240 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001241 // s32 and s64 are covered by tablegen.
Amara Emerson8f25a022019-06-21 16:43:50 +00001242 if (Ty != p0 && Ty != s8 && Ty != s16) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001243 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1244 << " constant, expected: " << s32 << ", " << s64
1245 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001246 return false;
1247 }
1248
1249 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001250 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1251 << " constant on bank: " << RB
1252 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001253 return false;
1254 }
1255 }
1256
Amara Emerson8f25a022019-06-21 16:43:50 +00001257 // We allow G_CONSTANT of types < 32b.
Tim Northover4494d692016-10-18 19:47:57 +00001258 const unsigned MovOpc =
Amara Emerson8f25a022019-06-21 16:43:50 +00001259 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
Tim Northover4494d692016-10-18 19:47:57 +00001260
Tim Northover4494d692016-10-18 19:47:57 +00001261 if (isFP) {
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001262 // Either emit a FMOV, or emit a copy to emit a normal mov.
Tim Northover4494d692016-10-18 19:47:57 +00001263 const TargetRegisterClass &GPRRC =
1264 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1265 const TargetRegisterClass &FPRRC =
1266 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1267
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001268 // Can we use a FMOV instruction to represent the immediate?
1269 if (emitFMovForFConstant(I, MRI))
1270 return true;
1271
1272 // Nope. Emit a copy and use a normal mov instead.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001273 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
Tim Northover4494d692016-10-18 19:47:57 +00001274 MachineOperand &RegOp = I.getOperand(0);
1275 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001276 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1277 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001278
1279 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001280 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001281 return false;
1282 }
1283
1284 MachineOperand &ImmOp = I.getOperand(1);
1285 // FIXME: Is going through int64_t always correct?
1286 ImmOp.ChangeToImmediate(
1287 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001288 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001289 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1290 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001291 } else if (I.getOperand(1).isImm()) {
1292 uint64_t Val = I.getOperand(1).getImm();
1293 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001294 }
1295
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001296 I.setDesc(TII.get(MovOpc));
Tim Northover4494d692016-10-18 19:47:57 +00001297 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1298 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001299 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001300 case TargetOpcode::G_EXTRACT: {
1301 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001302 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001303 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001304 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001305 // Larger extracts are vectors, same-size extracts should be something else
1306 // by now (either split up or simplified to a COPY).
1307 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1308 return false;
1309
Amara Emersonbc03bae2018-02-18 17:03:02 +00001310 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001311 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1312 Ty.getSizeInBits() - 1);
1313
Amara Emersonbc03bae2018-02-18 17:03:02 +00001314 if (SrcSize < 64) {
1315 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1316 "unexpected G_EXTRACT types");
1317 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1318 }
1319
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001320 Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001321 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001322 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1323 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001324 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1325 AArch64::GPR32RegClass, MRI);
1326 I.getOperand(0).setReg(DstReg);
1327
1328 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1329 }
1330
1331 case TargetOpcode::G_INSERT: {
1332 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001333 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1334 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001335 // Larger inserts are vectors, same-size ones should be something else by
1336 // now (split up or turned into COPYs).
1337 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1338 return false;
1339
Amara Emersonbc03bae2018-02-18 17:03:02 +00001340 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001341 unsigned LSB = I.getOperand(3).getImm();
1342 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001343 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001344 MachineInstrBuilder(MF, I).addImm(Width - 1);
1345
Amara Emersonbc03bae2018-02-18 17:03:02 +00001346 if (DstSize < 64) {
1347 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1348 "unexpected G_INSERT types");
1349 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1350 }
1351
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001352 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001353 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1354 TII.get(AArch64::SUBREG_TO_REG))
1355 .addDef(SrcReg)
1356 .addImm(0)
1357 .addUse(I.getOperand(2).getReg())
1358 .addImm(AArch64::sub_32);
1359 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1360 AArch64::GPR32RegClass, MRI);
1361 I.getOperand(2).setReg(SrcReg);
1362
1363 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1364 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001365 case TargetOpcode::G_FRAME_INDEX: {
1366 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001367 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001368 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1369 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001370 return false;
1371 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001372 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001373
1374 // MOs for a #0 shifted immediate.
1375 I.addOperand(MachineOperand::CreateImm(0));
1376 I.addOperand(MachineOperand::CreateImm(0));
1377
1378 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1379 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001380
1381 case TargetOpcode::G_GLOBAL_VALUE: {
1382 auto GV = I.getOperand(1).getGlobal();
1383 if (GV->isThreadLocal()) {
1384 // FIXME: we don't support TLS yet.
1385 return false;
1386 }
1387 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001388 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001389 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001390 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001391 } else if (TM.getCodeModel() == CodeModel::Large) {
1392 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001393 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001394 I.eraseFromParent();
1395 return true;
David Green9dd1d452018-08-22 11:31:39 +00001396 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1397 I.setDesc(TII.get(AArch64::ADR));
1398 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001399 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001400 I.setDesc(TII.get(AArch64::MOVaddr));
1401 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1402 MachineInstrBuilder MIB(MF, I);
1403 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1404 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1405 }
1406 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1407 }
1408
Amara Emersond3144a42019-06-06 07:58:37 +00001409 case TargetOpcode::G_ZEXTLOAD:
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001410 case TargetOpcode::G_LOAD:
1411 case TargetOpcode::G_STORE: {
Amara Emersond3144a42019-06-06 07:58:37 +00001412 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1413 MachineIRBuilder MIB(I);
1414
Tim Northover0f140c72016-09-09 11:46:34 +00001415 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001416
Tim Northover5ae83502016-09-15 09:20:34 +00001417 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001418 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1419 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001420 return false;
1421 }
1422
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001423 auto &MemOp = **I.memoperands_begin();
1424 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001425 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001426 return false;
1427 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001428 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001429
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001430 const Register PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001431#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001432 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001433 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001434 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1435 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001436 assert(MRI.getType(PtrReg).isPointer() &&
1437 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001438#endif
1439
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001440 const Register ValReg = I.getOperand(0).getReg();
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001441 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1442
1443 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001444 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001445 if (NewOpc == I.getOpcode())
1446 return false;
1447
1448 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001449
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001450 uint64_t Offset = 0;
1451 auto *PtrMI = MRI.getVRegDef(PtrReg);
1452
1453 // Try to fold a GEP into our unsigned immediate addressing mode.
1454 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1455 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1456 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001457 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001458 const unsigned Scale = Log2_32(Size);
1459 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1460 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1461 I.getOperand(1).setReg(Ptr2Reg);
1462 PtrMI = MRI.getVRegDef(Ptr2Reg);
1463 Offset = Imm / Size;
1464 }
1465 }
1466 }
1467
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001468 // If we haven't folded anything into our addressing mode yet, try to fold
1469 // a frame index into the base+offset.
1470 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1471 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1472
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001473 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001474
1475 // If we're storing a 0, use WZR/XZR.
1476 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1477 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1478 if (I.getOpcode() == AArch64::STRWui)
1479 I.getOperand(0).setReg(AArch64::WZR);
1480 else if (I.getOpcode() == AArch64::STRXui)
1481 I.getOperand(0).setReg(AArch64::XZR);
1482 }
1483 }
1484
Amara Emersond3144a42019-06-06 07:58:37 +00001485 if (IsZExtLoad) {
1486 // The zextload from a smaller type to i32 should be handled by the importer.
1487 if (MRI.getType(ValReg).getSizeInBits() != 64)
1488 return false;
1489 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1490 //and zero_extend with SUBREG_TO_REG.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001491 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1492 Register DstReg = I.getOperand(0).getReg();
Amara Emersond3144a42019-06-06 07:58:37 +00001493 I.getOperand(0).setReg(LdReg);
1494
1495 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1496 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1497 .addImm(0)
1498 .addUse(LdReg)
1499 .addImm(AArch64::sub_32);
1500 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1501 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1502 MRI);
1503 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001504 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1505 }
1506
Tim Northover9dd78f82017-02-08 21:22:25 +00001507 case TargetOpcode::G_SMULH:
1508 case TargetOpcode::G_UMULH: {
1509 // Reject the various things we don't support yet.
1510 if (unsupportedBinOp(I, RBI, MRI, TRI))
1511 return false;
1512
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001513 const Register DefReg = I.getOperand(0).getReg();
Tim Northover9dd78f82017-02-08 21:22:25 +00001514 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1515
1516 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001517 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001518 return false;
1519 }
1520
1521 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001522 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1523 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001524 return false;
1525 }
1526
1527 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1528 : AArch64::UMULHrr;
1529 I.setDesc(TII.get(NewOpc));
1530
1531 // Now that we selected an opcode, we need to constrain the register
1532 // operands to use appropriate classes.
1533 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1534 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001535 case TargetOpcode::G_FADD:
1536 case TargetOpcode::G_FSUB:
1537 case TargetOpcode::G_FMUL:
1538 case TargetOpcode::G_FDIV:
1539
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001540 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001541 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1542 return selectVectorASHR(I, MRI);
1543 LLVM_FALLTHROUGH;
1544 case TargetOpcode::G_SHL:
1545 if (Opcode == TargetOpcode::G_SHL &&
1546 MRI.getType(I.getOperand(0).getReg()).isVector())
1547 return selectVectorSHL(I, MRI);
1548 LLVM_FALLTHROUGH;
1549 case TargetOpcode::G_OR:
1550 case TargetOpcode::G_LSHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001551 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001552 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001553 if (unsupportedBinOp(I, RBI, MRI, TRI))
1554 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001555
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001556 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001557
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001558 const Register DefReg = I.getOperand(0).getReg();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001559 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1560
1561 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1562 if (NewOpc == I.getOpcode())
1563 return false;
1564
1565 I.setDesc(TII.get(NewOpc));
1566 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001567
1568 // Now that we selected an opcode, we need to constrain the register
1569 // operands to use appropriate classes.
1570 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1571 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001572
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001573 case TargetOpcode::G_UADDO: {
1574 // TODO: Support other types.
1575 unsigned OpSize = Ty.getSizeInBits();
1576 if (OpSize != 32 && OpSize != 64) {
1577 LLVM_DEBUG(
1578 dbgs()
1579 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1580 return false;
1581 }
1582
1583 // TODO: Support vectors.
1584 if (Ty.isVector()) {
1585 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1586 return false;
1587 }
1588
1589 // Add and set the set condition flag.
1590 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1591 MachineIRBuilder MIRBuilder(I);
1592 auto AddsMI = MIRBuilder.buildInstr(
1593 AddsOpc, {I.getOperand(0).getReg()},
1594 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1595 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1596
1597 // Now, put the overflow result in the register given by the first operand
1598 // to the G_UADDO. CSINC increments the result when the predicate is false,
1599 // so to get the increment when it's true, we need to use the inverse. In
1600 // this case, we want to increment when carry is set.
1601 auto CsetMI = MIRBuilder
1602 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001603 {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001604 .addImm(getInvertedCondCode(AArch64CC::HS));
1605 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1606 I.eraseFromParent();
1607 return true;
1608 }
1609
Tim Northover398c5f52017-02-14 20:56:29 +00001610 case TargetOpcode::G_PTR_MASK: {
1611 uint64_t Align = I.getOperand(2).getImm();
1612 if (Align >= 64 || Align == 0)
1613 return false;
1614
1615 uint64_t Mask = ~((1ULL << Align) - 1);
1616 I.setDesc(TII.get(AArch64::ANDXri));
1617 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1618
1619 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1620 }
Tim Northover037af52c2016-10-31 18:31:09 +00001621 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001622 case TargetOpcode::G_TRUNC: {
1623 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1624 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1625
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001626 const Register DstReg = I.getOperand(0).getReg();
1627 const Register SrcReg = I.getOperand(1).getReg();
Tim Northoverfb8d9892016-10-12 22:49:15 +00001628
1629 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1630 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1631
1632 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001633 LLVM_DEBUG(
1634 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001635 return false;
1636 }
1637
1638 if (DstRB.getID() == AArch64::GPRRegBankID) {
1639 const TargetRegisterClass *DstRC =
1640 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1641 if (!DstRC)
1642 return false;
1643
1644 const TargetRegisterClass *SrcRC =
1645 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1646 if (!SrcRC)
1647 return false;
1648
1649 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1650 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001651 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001652 return false;
1653 }
1654
1655 if (DstRC == SrcRC) {
1656 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001657 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1658 SrcTy == LLT::scalar(64)) {
1659 llvm_unreachable("TableGen can import this case");
1660 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001661 } else if (DstRC == &AArch64::GPR32RegClass &&
1662 SrcRC == &AArch64::GPR64RegClass) {
1663 I.getOperand(1).setSubReg(AArch64::sub_32);
1664 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001665 LLVM_DEBUG(
1666 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001667 return false;
1668 }
1669
1670 I.setDesc(TII.get(TargetOpcode::COPY));
1671 return true;
1672 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1673 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1674 I.setDesc(TII.get(AArch64::XTNv4i16));
1675 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1676 return true;
1677 }
1678 }
1679
1680 return false;
1681 }
1682
Tim Northover3d38b3a2016-10-11 20:50:21 +00001683 case TargetOpcode::G_ANYEXT: {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001684 const Register DstReg = I.getOperand(0).getReg();
1685 const Register SrcReg = I.getOperand(1).getReg();
Tim Northover3d38b3a2016-10-11 20:50:21 +00001686
Quentin Colombetcb629a82016-10-12 03:57:49 +00001687 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1688 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001689 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1690 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001691 return false;
1692 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001693
Quentin Colombetcb629a82016-10-12 03:57:49 +00001694 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1695 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001696 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1697 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001698 return false;
1699 }
1700
1701 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1702
1703 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001704 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001705 return false;
1706 }
1707
Quentin Colombetcb629a82016-10-12 03:57:49 +00001708 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001709 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1710 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001711 return false;
1712 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001713 // At this point G_ANYEXT is just like a plain COPY, but we need
1714 // to explicitly form the 64-bit value if any.
1715 if (DstSize > 32) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001716 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
Quentin Colombetcb629a82016-10-12 03:57:49 +00001717 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1718 .addDef(ExtSrc)
1719 .addImm(0)
1720 .addUse(SrcReg)
1721 .addImm(AArch64::sub_32);
1722 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001723 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001724 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001725 }
1726
1727 case TargetOpcode::G_ZEXT:
1728 case TargetOpcode::G_SEXT: {
1729 unsigned Opcode = I.getOpcode();
1730 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1731 SrcTy = MRI.getType(I.getOperand(1).getReg());
1732 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001733 const Register DefReg = I.getOperand(0).getReg();
1734 const Register SrcReg = I.getOperand(1).getReg();
Tim Northover3d38b3a2016-10-11 20:50:21 +00001735 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1736
1737 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001738 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1739 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001740 return false;
1741 }
1742
1743 MachineInstr *ExtI;
1744 if (DstTy == LLT::scalar(64)) {
1745 // FIXME: Can we avoid manually doing this?
1746 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001747 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1748 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001749 return false;
1750 }
1751
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001752 const Register SrcXReg =
Tim Northover3d38b3a2016-10-11 20:50:21 +00001753 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1754 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1755 .addDef(SrcXReg)
1756 .addImm(0)
1757 .addUse(SrcReg)
1758 .addImm(AArch64::sub_32);
1759
1760 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1761 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1762 .addDef(DefReg)
1763 .addUse(SrcXReg)
1764 .addImm(0)
1765 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001766 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001767 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1768 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1769 .addDef(DefReg)
1770 .addUse(SrcReg)
1771 .addImm(0)
1772 .addImm(SrcTy.getSizeInBits() - 1);
1773 } else {
1774 return false;
1775 }
1776
1777 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1778
1779 I.eraseFromParent();
1780 return true;
1781 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001782
Tim Northover69271c62016-10-12 22:49:11 +00001783 case TargetOpcode::G_SITOFP:
1784 case TargetOpcode::G_UITOFP:
1785 case TargetOpcode::G_FPTOSI:
1786 case TargetOpcode::G_FPTOUI: {
1787 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1788 SrcTy = MRI.getType(I.getOperand(1).getReg());
1789 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1790 if (NewOpc == Opcode)
1791 return false;
1792
1793 I.setDesc(TII.get(NewOpc));
1794 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1795
1796 return true;
1797 }
1798
1799
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001800 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001801 // The importer is currently unable to import pointer types since they
1802 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001803 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001804
Daniel Sandersedd07842017-08-17 09:26:14 +00001805 case TargetOpcode::G_BITCAST:
1806 // Imported SelectionDAG rules can handle every bitcast except those that
1807 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00001808 // but we might not run an optimizer that deletes them. The other exception
1809 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1810 // of them.
1811 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00001812
Tim Northover9ac0eba2016-11-08 00:45:29 +00001813 case TargetOpcode::G_SELECT: {
1814 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001815 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1816 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001817 return false;
1818 }
1819
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001820 const Register CondReg = I.getOperand(1).getReg();
1821 const Register TReg = I.getOperand(2).getReg();
1822 const Register FReg = I.getOperand(3).getReg();
Tim Northover9ac0eba2016-11-08 00:45:29 +00001823
Jessica Paquette910630c2019-05-03 22:37:46 +00001824 // If we have a floating-point result, then we should use a floating point
1825 // select instead of an integer select.
1826 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
1827 AArch64::GPRRegBankID);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001828
Amara Emersonc37ff0d2019-06-05 23:46:16 +00001829 if (IsFP && tryOptSelect(I))
1830 return true;
Tim Northover9ac0eba2016-11-08 00:45:29 +00001831
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001832 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001833 MachineInstr &TstMI =
1834 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1835 .addDef(AArch64::WZR)
1836 .addUse(CondReg)
1837 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1838
1839 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1840 .addDef(I.getOperand(0).getReg())
1841 .addUse(TReg)
1842 .addUse(FReg)
1843 .addImm(AArch64CC::NE);
1844
1845 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1846 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1847
1848 I.eraseFromParent();
1849 return true;
1850 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001851 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00001852 if (Ty.isVector())
1853 return selectVectorICmp(I, MRI);
1854
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001855 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001856 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1857 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001858 return false;
1859 }
1860
1861 unsigned CmpOpc = 0;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001862 Register ZReg;
Tim Northover6c02ad52016-10-12 22:49:04 +00001863
Jessica Paquette49537bb2019-06-17 18:40:06 +00001864 // Check if this compare can be represented as a cmn, and perform any
1865 // necessary transformations to do so.
1866 if (tryOptCMN(I))
1867 return true;
1868
Tim Northover6c02ad52016-10-12 22:49:04 +00001869 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1870 if (CmpTy == LLT::scalar(32)) {
1871 CmpOpc = AArch64::SUBSWrr;
1872 ZReg = AArch64::WZR;
1873 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1874 CmpOpc = AArch64::SUBSXrr;
1875 ZReg = AArch64::XZR;
1876 } else {
1877 return false;
1878 }
1879
Amara Emerson0d209692019-06-09 07:31:25 +00001880 // Try to match immediate forms.
1881 auto ImmFns = selectArithImmed(I.getOperand(3));
1882 if (ImmFns)
1883 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
1884
Amara Emerson0d209692019-06-09 07:31:25 +00001885 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1886 .addDef(ZReg)
1887 .addUse(I.getOperand(2).getReg());
1888
1889 // If we matched a valid constant immediate, add those operands.
1890 if (ImmFns) {
1891 for (auto &RenderFn : *ImmFns)
1892 RenderFn(CmpMI);
1893 } else {
1894 CmpMI.addUse(I.getOperand(3).getReg());
1895 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001896
Jessica Paquette49537bb2019-06-17 18:40:06 +00001897 MachineIRBuilder MIRBuilder(I);
1898 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
1899 MIRBuilder);
Amara Emerson0d209692019-06-09 07:31:25 +00001900 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover6c02ad52016-10-12 22:49:04 +00001901 I.eraseFromParent();
1902 return true;
1903 }
1904
Tim Northover7dd378d2016-10-12 22:49:07 +00001905 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001906 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001907 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1908 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001909 return false;
1910 }
1911
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001912 unsigned CmpOpc = selectFCMPOpc(I, MRI);
1913 if (!CmpOpc)
Tim Northover7dd378d2016-10-12 22:49:07 +00001914 return false;
Tim Northover7dd378d2016-10-12 22:49:07 +00001915
1916 // FIXME: regbank
1917
1918 AArch64CC::CondCode CC1, CC2;
1919 changeFCMPPredToAArch64CC(
1920 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1921
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001922 // Partially build the compare. Decide if we need to add a use for the
1923 // third operand based off whether or not we're comparing against 0.0.
1924 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1925 .addUse(I.getOperand(2).getReg());
1926
1927 // If we don't have an immediate compare, then we need to add a use of the
1928 // register which wasn't used for the immediate.
1929 // Note that the immediate will always be the last operand.
1930 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
1931 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
Tim Northover7dd378d2016-10-12 22:49:07 +00001932
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001933 const Register DefReg = I.getOperand(0).getReg();
1934 Register Def1Reg = DefReg;
Tim Northover7dd378d2016-10-12 22:49:07 +00001935 if (CC2 != AArch64CC::AL)
1936 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1937
1938 MachineInstr &CSetMI =
1939 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1940 .addDef(Def1Reg)
1941 .addUse(AArch64::WZR)
1942 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001943 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001944
1945 if (CC2 != AArch64CC::AL) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001946 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Tim Northover7dd378d2016-10-12 22:49:07 +00001947 MachineInstr &CSet2MI =
1948 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1949 .addDef(Def2Reg)
1950 .addUse(AArch64::WZR)
1951 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001952 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001953 MachineInstr &OrMI =
1954 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1955 .addDef(DefReg)
1956 .addUse(Def1Reg)
1957 .addUse(Def2Reg);
1958 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1959 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1960 }
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001961 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover7dd378d2016-10-12 22:49:07 +00001962 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1963
1964 I.eraseFromParent();
1965 return true;
1966 }
Tim Northovere9600d82017-02-08 17:57:27 +00001967 case TargetOpcode::G_VASTART:
1968 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1969 : selectVaStartAAPCS(I, MF, MRI);
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00001970 case TargetOpcode::G_INTRINSIC:
1971 return selectIntrinsic(I, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001972 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001973 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001974 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001975 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001976 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001977 const Register DstReg = I.getOperand(0).getReg();
Amara Emerson58aea522018-02-02 01:44:43 +00001978 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1979 const TargetRegisterClass *DstRC =
1980 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1981 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001982 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001983 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001984 case TargetOpcode::G_BLOCK_ADDR: {
1985 if (TM.getCodeModel() == CodeModel::Large) {
1986 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1987 I.eraseFromParent();
1988 return true;
1989 } else {
1990 I.setDesc(TII.get(AArch64::MOVaddrBA));
1991 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1992 I.getOperand(0).getReg())
1993 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1994 /* Offset */ 0, AArch64II::MO_PAGE)
1995 .addBlockAddress(
1996 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1997 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1998 I.eraseFromParent();
1999 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2000 }
2001 }
Jessica Paquette991cb392019-04-23 20:46:19 +00002002 case TargetOpcode::G_INTRINSIC_TRUNC:
2003 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00002004 case TargetOpcode::G_INTRINSIC_ROUND:
2005 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00002006 case TargetOpcode::G_BUILD_VECTOR:
2007 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002008 case TargetOpcode::G_MERGE_VALUES:
2009 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002010 case TargetOpcode::G_UNMERGE_VALUES:
2011 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002012 case TargetOpcode::G_SHUFFLE_VECTOR:
2013 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00002014 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2015 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002016 case TargetOpcode::G_INSERT_VECTOR_ELT:
2017 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00002018 case TargetOpcode::G_CONCAT_VECTORS:
2019 return selectConcatVectors(I, MRI);
Amara Emerson6e71b342019-06-21 18:10:41 +00002020 case TargetOpcode::G_JUMP_TABLE:
2021 return selectJumpTable(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002022 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002023
2024 return false;
2025}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002026
Amara Emerson6e71b342019-06-21 18:10:41 +00002027bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2028 MachineRegisterInfo &MRI) const {
2029 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002030 Register JTAddr = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002031 unsigned JTI = I.getOperand(1).getIndex();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002032 Register Index = I.getOperand(2).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002033 MachineIRBuilder MIB(I);
2034
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002035 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2036 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
Amara Emerson6e71b342019-06-21 18:10:41 +00002037 MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2038 {JTAddr, Index})
2039 .addJumpTableIndex(JTI);
2040
2041 // Build the indirect branch.
2042 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2043 I.eraseFromParent();
2044 return true;
2045}
2046
2047bool AArch64InstructionSelector::selectJumpTable(
2048 MachineInstr &I, MachineRegisterInfo &MRI) const {
2049 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2050 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2051
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002052 Register DstReg = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002053 unsigned JTI = I.getOperand(1).getIndex();
2054 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2055 MachineIRBuilder MIB(I);
2056 auto MovMI =
2057 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2058 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2059 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2060 I.eraseFromParent();
2061 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2062}
2063
Jessica Paquette991cb392019-04-23 20:46:19 +00002064bool AArch64InstructionSelector::selectIntrinsicTrunc(
2065 MachineInstr &I, MachineRegisterInfo &MRI) const {
2066 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2067
2068 // Select the correct opcode.
2069 unsigned Opc = 0;
2070 if (!SrcTy.isVector()) {
2071 switch (SrcTy.getSizeInBits()) {
2072 default:
2073 case 16:
2074 Opc = AArch64::FRINTZHr;
2075 break;
2076 case 32:
2077 Opc = AArch64::FRINTZSr;
2078 break;
2079 case 64:
2080 Opc = AArch64::FRINTZDr;
2081 break;
2082 }
2083 } else {
2084 unsigned NumElts = SrcTy.getNumElements();
2085 switch (SrcTy.getElementType().getSizeInBits()) {
2086 default:
2087 break;
2088 case 16:
2089 if (NumElts == 4)
2090 Opc = AArch64::FRINTZv4f16;
2091 else if (NumElts == 8)
2092 Opc = AArch64::FRINTZv8f16;
2093 break;
2094 case 32:
2095 if (NumElts == 2)
2096 Opc = AArch64::FRINTZv2f32;
2097 else if (NumElts == 4)
2098 Opc = AArch64::FRINTZv4f32;
2099 break;
2100 case 64:
2101 if (NumElts == 2)
2102 Opc = AArch64::FRINTZv2f64;
2103 break;
2104 }
2105 }
2106
2107 if (!Opc) {
2108 // Didn't get an opcode above, bail.
2109 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2110 return false;
2111 }
2112
2113 // Legalization would have set us up perfectly for this; we just need to
2114 // set the opcode and move on.
2115 I.setDesc(TII.get(Opc));
2116 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2117}
2118
Jessica Paquette4fe75742019-04-23 23:03:03 +00002119bool AArch64InstructionSelector::selectIntrinsicRound(
2120 MachineInstr &I, MachineRegisterInfo &MRI) const {
2121 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2122
2123 // Select the correct opcode.
2124 unsigned Opc = 0;
2125 if (!SrcTy.isVector()) {
2126 switch (SrcTy.getSizeInBits()) {
2127 default:
2128 case 16:
2129 Opc = AArch64::FRINTAHr;
2130 break;
2131 case 32:
2132 Opc = AArch64::FRINTASr;
2133 break;
2134 case 64:
2135 Opc = AArch64::FRINTADr;
2136 break;
2137 }
2138 } else {
2139 unsigned NumElts = SrcTy.getNumElements();
2140 switch (SrcTy.getElementType().getSizeInBits()) {
2141 default:
2142 break;
2143 case 16:
2144 if (NumElts == 4)
2145 Opc = AArch64::FRINTAv4f16;
2146 else if (NumElts == 8)
2147 Opc = AArch64::FRINTAv8f16;
2148 break;
2149 case 32:
2150 if (NumElts == 2)
2151 Opc = AArch64::FRINTAv2f32;
2152 else if (NumElts == 4)
2153 Opc = AArch64::FRINTAv4f32;
2154 break;
2155 case 64:
2156 if (NumElts == 2)
2157 Opc = AArch64::FRINTAv2f64;
2158 break;
2159 }
2160 }
2161
2162 if (!Opc) {
2163 // Didn't get an opcode above, bail.
2164 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2165 return false;
2166 }
2167
2168 // Legalization would have set us up perfectly for this; we just need to
2169 // set the opcode and move on.
2170 I.setDesc(TII.get(Opc));
2171 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2172}
2173
Amara Emerson9bf092d2019-04-09 21:22:43 +00002174bool AArch64InstructionSelector::selectVectorICmp(
2175 MachineInstr &I, MachineRegisterInfo &MRI) const {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002176 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002177 LLT DstTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002178 Register SrcReg = I.getOperand(2).getReg();
2179 Register Src2Reg = I.getOperand(3).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002180 LLT SrcTy = MRI.getType(SrcReg);
2181
2182 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2183 unsigned NumElts = DstTy.getNumElements();
2184
2185 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2186 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2187 // Third index is cc opcode:
2188 // 0 == eq
2189 // 1 == ugt
2190 // 2 == uge
2191 // 3 == ult
2192 // 4 == ule
2193 // 5 == sgt
2194 // 6 == sge
2195 // 7 == slt
2196 // 8 == sle
2197 // ne is done by negating 'eq' result.
2198
2199 // This table below assumes that for some comparisons the operands will be
2200 // commuted.
2201 // ult op == commute + ugt op
2202 // ule op == commute + uge op
2203 // slt op == commute + sgt op
2204 // sle op == commute + sge op
2205 unsigned PredIdx = 0;
2206 bool SwapOperands = false;
2207 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2208 switch (Pred) {
2209 case CmpInst::ICMP_NE:
2210 case CmpInst::ICMP_EQ:
2211 PredIdx = 0;
2212 break;
2213 case CmpInst::ICMP_UGT:
2214 PredIdx = 1;
2215 break;
2216 case CmpInst::ICMP_UGE:
2217 PredIdx = 2;
2218 break;
2219 case CmpInst::ICMP_ULT:
2220 PredIdx = 3;
2221 SwapOperands = true;
2222 break;
2223 case CmpInst::ICMP_ULE:
2224 PredIdx = 4;
2225 SwapOperands = true;
2226 break;
2227 case CmpInst::ICMP_SGT:
2228 PredIdx = 5;
2229 break;
2230 case CmpInst::ICMP_SGE:
2231 PredIdx = 6;
2232 break;
2233 case CmpInst::ICMP_SLT:
2234 PredIdx = 7;
2235 SwapOperands = true;
2236 break;
2237 case CmpInst::ICMP_SLE:
2238 PredIdx = 8;
2239 SwapOperands = true;
2240 break;
2241 default:
2242 llvm_unreachable("Unhandled icmp predicate");
2243 return false;
2244 }
2245
2246 // This table obviously should be tablegen'd when we have our GISel native
2247 // tablegen selector.
2248
2249 static const unsigned OpcTable[4][4][9] = {
2250 {
2251 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2252 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2253 0 /* invalid */},
2254 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2255 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2256 0 /* invalid */},
2257 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2258 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2259 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2260 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2261 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2262 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2263 },
2264 {
2265 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2266 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2267 0 /* invalid */},
2268 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2269 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2270 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2271 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2272 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2273 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2274 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2275 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2276 0 /* invalid */}
2277 },
2278 {
2279 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2280 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2281 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2282 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2283 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2284 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2285 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2286 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2287 0 /* invalid */},
2288 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2289 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2290 0 /* invalid */}
2291 },
2292 {
2293 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2294 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2295 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2296 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2297 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2298 0 /* invalid */},
2299 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2300 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2301 0 /* invalid */},
2302 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2303 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2304 0 /* invalid */}
2305 },
2306 };
2307 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2308 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2309 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2310 if (!Opc) {
2311 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2312 return false;
2313 }
2314
2315 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2316 const TargetRegisterClass *SrcRC =
2317 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2318 if (!SrcRC) {
2319 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2320 return false;
2321 }
2322
2323 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2324 if (SrcTy.getSizeInBits() == 128)
2325 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2326
2327 if (SwapOperands)
2328 std::swap(SrcReg, Src2Reg);
2329
2330 MachineIRBuilder MIB(I);
2331 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2332 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2333
2334 // Invert if we had a 'ne' cc.
2335 if (NotOpc) {
2336 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2337 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2338 } else {
2339 MIB.buildCopy(DstReg, Cmp.getReg(0));
2340 }
2341 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2342 I.eraseFromParent();
2343 return true;
2344}
2345
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002346MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002347 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002348 MachineIRBuilder &MIRBuilder) const {
2349 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002350
2351 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002352 auto Ins =
2353 MIRBuilder
2354 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2355 .addImm(SubregIndex);
2356 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2357 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2358 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002359 };
2360
Amara Emerson8acb0d92019-03-04 19:16:00 +00002361 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002362 case 16:
2363 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002364 case 32:
2365 return BuildFn(AArch64::ssub);
2366 case 64:
2367 return BuildFn(AArch64::dsub);
2368 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002369 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002370 }
2371}
2372
Amara Emerson8cb186c2018-12-20 01:11:04 +00002373bool AArch64InstructionSelector::selectMergeValues(
2374 MachineInstr &I, MachineRegisterInfo &MRI) const {
2375 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2376 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2377 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2378 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2379
2380 // At the moment we only support merging two s32s into an s64.
2381 if (I.getNumOperands() != 3)
2382 return false;
2383 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2384 return false;
2385 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2386 if (RB.getID() != AArch64::GPRRegBankID)
2387 return false;
2388
2389 auto *DstRC = &AArch64::GPR64RegClass;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002390 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002391 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2392 TII.get(TargetOpcode::SUBREG_TO_REG))
2393 .addDef(SubToRegDef)
2394 .addImm(0)
2395 .addUse(I.getOperand(1).getReg())
2396 .addImm(AArch64::sub_32);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002397 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002398 // Need to anyext the second scalar before we can use bfm
2399 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2400 TII.get(TargetOpcode::SUBREG_TO_REG))
2401 .addDef(SubToRegDef2)
2402 .addImm(0)
2403 .addUse(I.getOperand(2).getReg())
2404 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002405 MachineInstr &BFM =
2406 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002407 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002408 .addUse(SubToRegDef)
2409 .addUse(SubToRegDef2)
2410 .addImm(32)
2411 .addImm(31);
2412 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2413 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2414 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2415 I.eraseFromParent();
2416 return true;
2417}
2418
Jessica Paquette607774c2019-03-11 22:18:01 +00002419static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2420 const unsigned EltSize) {
2421 // Choose a lane copy opcode and subregister based off of the size of the
2422 // vector's elements.
2423 switch (EltSize) {
2424 case 16:
2425 CopyOpc = AArch64::CPYi16;
2426 ExtractSubReg = AArch64::hsub;
2427 break;
2428 case 32:
2429 CopyOpc = AArch64::CPYi32;
2430 ExtractSubReg = AArch64::ssub;
2431 break;
2432 case 64:
2433 CopyOpc = AArch64::CPYi64;
2434 ExtractSubReg = AArch64::dsub;
2435 break;
2436 default:
2437 // Unknown size, bail out.
2438 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2439 return false;
2440 }
2441 return true;
2442}
2443
Amara Emersond61b89b2019-03-14 22:48:18 +00002444MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002445 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2446 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
Amara Emersond61b89b2019-03-14 22:48:18 +00002447 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2448 unsigned CopyOpc = 0;
2449 unsigned ExtractSubReg = 0;
2450 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2451 LLVM_DEBUG(
2452 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2453 return nullptr;
2454 }
2455
2456 const TargetRegisterClass *DstRC =
2457 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2458 if (!DstRC) {
2459 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2460 return nullptr;
2461 }
2462
2463 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2464 const LLT &VecTy = MRI.getType(VecReg);
2465 const TargetRegisterClass *VecRC =
2466 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2467 if (!VecRC) {
2468 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2469 return nullptr;
2470 }
2471
2472 // The register that we're going to copy into.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002473 Register InsertReg = VecReg;
Amara Emersond61b89b2019-03-14 22:48:18 +00002474 if (!DstReg)
2475 DstReg = MRI.createVirtualRegister(DstRC);
2476 // If the lane index is 0, we just use a subregister COPY.
2477 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002478 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2479 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002480 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002481 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002482 }
2483
2484 // Lane copies require 128-bit wide registers. If we're dealing with an
2485 // unpacked vector, then we need to move up to that width. Insert an implicit
2486 // def and a subregister insert to get us there.
2487 if (VecTy.getSizeInBits() != 128) {
2488 MachineInstr *ScalarToVector = emitScalarToVector(
2489 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2490 if (!ScalarToVector)
2491 return nullptr;
2492 InsertReg = ScalarToVector->getOperand(0).getReg();
2493 }
2494
2495 MachineInstr *LaneCopyMI =
2496 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2497 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2498
2499 // Make sure that we actually constrain the initial copy.
2500 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2501 return LaneCopyMI;
2502}
2503
Jessica Paquette607774c2019-03-11 22:18:01 +00002504bool AArch64InstructionSelector::selectExtractElt(
2505 MachineInstr &I, MachineRegisterInfo &MRI) const {
2506 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2507 "unexpected opcode!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002508 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002509 const LLT NarrowTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002510 const Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002511 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002512 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002513 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2514 "source register size too small!");
2515 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2516
2517 // Need the lane index to determine the correct copy opcode.
2518 MachineOperand &LaneIdxOp = I.getOperand(2);
2519 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2520
2521 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2522 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2523 return false;
2524 }
2525
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002526 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002527 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2528 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002529 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002530 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002531
Jessica Paquette607774c2019-03-11 22:18:01 +00002532 MachineIRBuilder MIRBuilder(I);
2533
Amara Emersond61b89b2019-03-14 22:48:18 +00002534 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2535 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2536 LaneIdx, MIRBuilder);
2537 if (!Extract)
2538 return false;
2539
2540 I.eraseFromParent();
2541 return true;
2542}
2543
2544bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2545 MachineInstr &I, MachineRegisterInfo &MRI) const {
2546 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002547 Register SrcReg = I.getOperand(NumElts).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002548 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2549 const LLT SrcTy = MRI.getType(SrcReg);
2550
2551 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2552 if (SrcTy.getSizeInBits() > 128) {
2553 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2554 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002555 }
2556
Amara Emersond61b89b2019-03-14 22:48:18 +00002557 MachineIRBuilder MIB(I);
2558
2559 // We implement a split vector operation by treating the sub-vectors as
2560 // scalars and extracting them.
2561 const RegisterBank &DstRB =
2562 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2563 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002564 Register Dst = I.getOperand(OpIdx).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002565 MachineInstr *Extract =
2566 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2567 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002568 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002569 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002570 I.eraseFromParent();
2571 return true;
2572}
2573
Jessica Paquette245047d2019-01-24 22:00:41 +00002574bool AArch64InstructionSelector::selectUnmergeValues(
2575 MachineInstr &I, MachineRegisterInfo &MRI) const {
2576 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2577 "unexpected opcode");
2578
2579 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2580 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2581 AArch64::FPRRegBankID ||
2582 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2583 AArch64::FPRRegBankID) {
2584 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2585 "currently unsupported.\n");
2586 return false;
2587 }
2588
2589 // The last operand is the vector source register, and every other operand is
2590 // a register to unpack into.
2591 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002592 Register SrcReg = I.getOperand(NumElts).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002593 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2594 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002595 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002596 assert(WideTy.isVector() && "can only unmerge from vector types!");
2597 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2598 "source register size too small!");
2599
Amara Emersond61b89b2019-03-14 22:48:18 +00002600 if (!NarrowTy.isScalar())
2601 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002602
Amara Emerson3739a202019-03-15 21:59:50 +00002603 MachineIRBuilder MIB(I);
2604
Jessica Paquette245047d2019-01-24 22:00:41 +00002605 // Choose a lane copy opcode and subregister based off of the size of the
2606 // vector's elements.
2607 unsigned CopyOpc = 0;
2608 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002609 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002610 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002611
2612 // Set up for the lane copies.
2613 MachineBasicBlock &MBB = *I.getParent();
2614
2615 // Stores the registers we'll be copying from.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002616 SmallVector<Register, 4> InsertRegs;
Jessica Paquette245047d2019-01-24 22:00:41 +00002617
2618 // We'll use the first register twice, so we only need NumElts-1 registers.
2619 unsigned NumInsertRegs = NumElts - 1;
2620
2621 // If our elements fit into exactly 128 bits, then we can copy from the source
2622 // directly. Otherwise, we need to do a bit of setup with some subregister
2623 // inserts.
2624 if (NarrowTy.getSizeInBits() * NumElts == 128) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002625 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00002626 } else {
2627 // No. We have to perform subregister inserts. For each insert, create an
2628 // implicit def and a subregister insert, and save the register we create.
2629 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002630 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00002631 MachineInstr &ImpDefMI =
2632 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2633 ImpDefReg);
2634
2635 // Now, create the subregister insert from SrcReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002636 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00002637 MachineInstr &InsMI =
2638 *BuildMI(MBB, I, I.getDebugLoc(),
2639 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2640 .addUse(ImpDefReg)
2641 .addUse(SrcReg)
2642 .addImm(AArch64::dsub);
2643
2644 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2645 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2646
2647 // Save the register so that we can copy from it after.
2648 InsertRegs.push_back(InsertReg);
2649 }
2650 }
2651
2652 // Now that we've created any necessary subregister inserts, we can
2653 // create the copies.
2654 //
2655 // Perform the first copy separately as a subregister copy.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002656 Register CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002657 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2658 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002659 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002660
2661 // Now, perform the remaining copies as vector lane copies.
2662 unsigned LaneIdx = 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002663 for (Register InsReg : InsertRegs) {
2664 Register CopyTo = I.getOperand(LaneIdx).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002665 MachineInstr &CopyInst =
2666 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2667 .addUse(InsReg)
2668 .addImm(LaneIdx);
2669 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2670 ++LaneIdx;
2671 }
2672
2673 // Separately constrain the first copy's destination. Because of the
2674 // limitation in constrainOperandRegClass, we can't guarantee that this will
2675 // actually be constrained. So, do it ourselves using the second operand.
2676 const TargetRegisterClass *RC =
2677 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2678 if (!RC) {
2679 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2680 return false;
2681 }
2682
2683 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2684 I.eraseFromParent();
2685 return true;
2686}
2687
Amara Emerson2ff22982019-03-14 22:48:15 +00002688bool AArch64InstructionSelector::selectConcatVectors(
2689 MachineInstr &I, MachineRegisterInfo &MRI) const {
2690 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2691 "Unexpected opcode");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002692 Register Dst = I.getOperand(0).getReg();
2693 Register Op1 = I.getOperand(1).getReg();
2694 Register Op2 = I.getOperand(2).getReg();
Amara Emerson2ff22982019-03-14 22:48:15 +00002695 MachineIRBuilder MIRBuilder(I);
2696 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2697 if (!ConcatMI)
2698 return false;
2699 I.eraseFromParent();
2700 return true;
2701}
2702
Amara Emerson1abe05c2019-02-21 20:20:16 +00002703void AArch64InstructionSelector::collectShuffleMaskIndices(
2704 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00002705 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00002706 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2707 assert(
2708 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2709 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2710 // Find the constant indices.
2711 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2712 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2713 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2714 // Look through copies.
2715 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2716 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2717 assert(ScalarDef && "Could not find def of copy operand");
2718 }
Amara Emerson2806fd02019-04-12 21:31:21 +00002719 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2720 // This be an undef if not a constant.
2721 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2722 Idxs.push_back(None);
2723 } else {
2724 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2725 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002726 }
2727}
2728
2729unsigned
2730AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2731 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00002732 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00002733 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2734 if (Align == 0)
2735 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2736
2737 MachineConstantPool *MCP = MF.getConstantPool();
2738 return MCP->getConstantPoolIndex(CPVal, Align);
2739}
2740
2741MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2742 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2743 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2744
2745 auto Adrp =
2746 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2747 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002748
2749 MachineInstr *LoadMI = nullptr;
2750 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2751 case 16:
2752 LoadMI =
2753 &*MIRBuilder
2754 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2755 .addConstantPoolIndex(CPIdx, 0,
2756 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2757 break;
2758 case 8:
2759 LoadMI = &*MIRBuilder
2760 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2761 .addConstantPoolIndex(
2762 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2763 break;
2764 default:
2765 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2766 << *CPVal->getType());
2767 return nullptr;
2768 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002769 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002770 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2771 return LoadMI;
2772}
2773
2774/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2775/// size and RB.
2776static std::pair<unsigned, unsigned>
2777getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2778 unsigned Opc, SubregIdx;
2779 if (RB.getID() == AArch64::GPRRegBankID) {
2780 if (EltSize == 32) {
2781 Opc = AArch64::INSvi32gpr;
2782 SubregIdx = AArch64::ssub;
2783 } else if (EltSize == 64) {
2784 Opc = AArch64::INSvi64gpr;
2785 SubregIdx = AArch64::dsub;
2786 } else {
2787 llvm_unreachable("invalid elt size!");
2788 }
2789 } else {
2790 if (EltSize == 8) {
2791 Opc = AArch64::INSvi8lane;
2792 SubregIdx = AArch64::bsub;
2793 } else if (EltSize == 16) {
2794 Opc = AArch64::INSvi16lane;
2795 SubregIdx = AArch64::hsub;
2796 } else if (EltSize == 32) {
2797 Opc = AArch64::INSvi32lane;
2798 SubregIdx = AArch64::ssub;
2799 } else if (EltSize == 64) {
2800 Opc = AArch64::INSvi64lane;
2801 SubregIdx = AArch64::dsub;
2802 } else {
2803 llvm_unreachable("invalid elt size!");
2804 }
2805 }
2806 return std::make_pair(Opc, SubregIdx);
2807}
2808
2809MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002810 Optional<Register> Dst, Register Op1, Register Op2,
Amara Emerson2ff22982019-03-14 22:48:15 +00002811 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002812 // We implement a vector concat by:
2813 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2814 // 2. Insert the upper vector into the destination's upper element
2815 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2816 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2817
2818 const LLT Op1Ty = MRI.getType(Op1);
2819 const LLT Op2Ty = MRI.getType(Op2);
2820
2821 if (Op1Ty != Op2Ty) {
2822 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2823 return nullptr;
2824 }
2825 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2826
2827 if (Op1Ty.getSizeInBits() >= 128) {
2828 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2829 return nullptr;
2830 }
2831
2832 // At the moment we just support 64 bit vector concats.
2833 if (Op1Ty.getSizeInBits() != 64) {
2834 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2835 return nullptr;
2836 }
2837
2838 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2839 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2840 const TargetRegisterClass *DstRC =
2841 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2842
2843 MachineInstr *WidenedOp1 =
2844 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2845 MachineInstr *WidenedOp2 =
2846 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2847 if (!WidenedOp1 || !WidenedOp2) {
2848 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2849 return nullptr;
2850 }
2851
2852 // Now do the insert of the upper element.
2853 unsigned InsertOpc, InsSubRegIdx;
2854 std::tie(InsertOpc, InsSubRegIdx) =
2855 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2856
Amara Emerson2ff22982019-03-14 22:48:15 +00002857 if (!Dst)
2858 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002859 auto InsElt =
2860 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002861 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002862 .addImm(1) /* Lane index */
2863 .addUse(WidenedOp2->getOperand(0).getReg())
2864 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002865 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2866 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002867}
2868
Jessica Paquettea3843fe2019-05-01 22:39:43 +00002869MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
2870 MachineInstr &I, MachineRegisterInfo &MRI) const {
2871 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
2872 "Expected a G_FCONSTANT!");
2873 MachineOperand &ImmOp = I.getOperand(1);
2874 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
2875
2876 // Only handle 32 and 64 bit defs for now.
2877 if (DefSize != 32 && DefSize != 64)
2878 return nullptr;
2879
2880 // Don't handle null values using FMOV.
2881 if (ImmOp.getFPImm()->isNullValue())
2882 return nullptr;
2883
2884 // Get the immediate representation for the FMOV.
2885 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
2886 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
2887 : AArch64_AM::getFP64Imm(ImmValAPF);
2888
2889 // If this is -1, it means the immediate can't be represented as the requested
2890 // floating point value. Bail.
2891 if (Imm == -1)
2892 return nullptr;
2893
2894 // Update MI to represent the new FMOV instruction, constrain it, and return.
2895 ImmOp.ChangeToImmediate(Imm);
2896 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
2897 I.setDesc(TII.get(MovOpc));
2898 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2899 return &I;
2900}
2901
Jessica Paquette49537bb2019-06-17 18:40:06 +00002902MachineInstr *
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002903AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +00002904 MachineIRBuilder &MIRBuilder) const {
2905 // CSINC increments the result when the predicate is false. Invert it.
2906 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
2907 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
2908 auto I =
2909 MIRBuilder
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002910 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette49537bb2019-06-17 18:40:06 +00002911 .addImm(InvCC);
2912 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
2913 return &*I;
2914}
2915
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002916bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
2917 MachineIRBuilder MIB(I);
2918 MachineRegisterInfo &MRI = *MIB.getMRI();
2919 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2920
2921 // We want to recognize this pattern:
2922 //
2923 // $z = G_FCMP pred, $x, $y
2924 // ...
2925 // $w = G_SELECT $z, $a, $b
2926 //
2927 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
2928 // some copies/truncs in between.)
2929 //
2930 // If we see this, then we can emit something like this:
2931 //
2932 // fcmp $x, $y
2933 // fcsel $w, $a, $b, pred
2934 //
2935 // Rather than emitting both of the rather long sequences in the standard
2936 // G_FCMP/G_SELECT select methods.
2937
2938 // First, check if the condition is defined by a compare.
2939 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
2940 while (CondDef) {
2941 // We can only fold if all of the defs have one use.
2942 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
2943 return false;
2944
2945 // We can skip over G_TRUNC since the condition is 1-bit.
2946 // Truncating/extending can have no impact on the value.
2947 unsigned Opc = CondDef->getOpcode();
2948 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
2949 break;
2950
Amara Emersond940e202019-06-06 07:33:47 +00002951 // Can't see past copies from physregs.
2952 if (Opc == TargetOpcode::COPY &&
2953 TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
2954 return false;
2955
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002956 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
2957 }
2958
2959 // Is the condition defined by a compare?
2960 // TODO: Handle G_ICMP.
2961 if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
2962 return false;
2963
2964 // Get the condition code for the select.
2965 AArch64CC::CondCode CondCode;
2966 AArch64CC::CondCode CondCode2;
2967 changeFCMPPredToAArch64CC(
2968 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
2969 CondCode2);
2970
2971 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
2972 // instructions to emit the comparison.
2973 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
2974 // unnecessary.
2975 if (CondCode2 != AArch64CC::AL)
2976 return false;
2977
2978 // Make sure we'll be able to select the compare.
2979 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
2980 if (!CmpOpc)
2981 return false;
2982
2983 // Emit a new compare.
2984 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
2985 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2986 Cmp.addUse(CondDef->getOperand(3).getReg());
2987
2988 // Emit the select.
2989 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
2990 auto CSel =
2991 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
2992 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
2993 .addImm(CondCode);
2994 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2995 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
2996 I.eraseFromParent();
2997 return true;
2998}
2999
Jessica Paquette49537bb2019-06-17 18:40:06 +00003000bool AArch64InstructionSelector::tryOptCMN(MachineInstr &I) const {
3001 assert(I.getOpcode() == TargetOpcode::G_ICMP && "Expected G_ICMP");
3002 MachineIRBuilder MIRBuilder(I);
3003 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3004 // We want to find this sort of thing:
3005 // x = G_SUB 0, y
3006 // G_ICMP z, x
3007 //
3008 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3009 // e.g:
3010 //
3011 // cmn z, y
3012
3013 // Helper lambda to find the def.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003014 auto FindDef = [&](Register VReg) {
Jessica Paquette49537bb2019-06-17 18:40:06 +00003015 MachineInstr *Def = MRI.getVRegDef(VReg);
3016 while (Def) {
3017 if (Def->getOpcode() != TargetOpcode::COPY)
3018 break;
3019 // Copies can be from physical registers. If we hit this, we're done.
3020 if (TargetRegisterInfo::isPhysicalRegister(Def->getOperand(1).getReg()))
3021 break;
3022 Def = MRI.getVRegDef(Def->getOperand(1).getReg());
3023 }
3024 return Def;
3025 };
3026
3027 // Helper lambda to detect the subtract followed by the compare.
3028 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3029 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3030 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3031 return false;
3032
3033 // Need to make sure NZCV is the same at the end of the transformation.
3034 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3035 return false;
3036
3037 // We want to match against SUBs.
3038 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3039 return false;
3040
3041 // Make sure that we're getting
3042 // x = G_SUB 0, y
3043 auto ValAndVReg =
3044 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3045 if (!ValAndVReg || ValAndVReg->Value != 0)
3046 return false;
3047
3048 // This can safely be represented as a CMN.
3049 return true;
3050 };
3051
3052 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
3053 MachineInstr *LHSDef = FindDef(I.getOperand(2).getReg());
3054 MachineInstr *RHSDef = FindDef(I.getOperand(3).getReg());
3055 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
3056 (CmpInst::Predicate)I.getOperand(1).getPredicate());
3057 bool DidFold = false;
3058 if (IsCMN(LHSDef, CC)) {
3059 // We're doing this:
3060 //
3061 // Given:
3062 //
3063 // x = G_SUB 0, y
3064 // G_ICMP x, z
3065 //
3066 // Update the G_ICMP:
3067 //
3068 // G_ICMP y, z
3069 I.getOperand(2).setReg(LHSDef->getOperand(2).getReg());
3070 DidFold = true;
3071 } else if (IsCMN(RHSDef, CC)) {
3072 // Same idea here, but with the RHS of the compare instead:
3073 //
3074 // Given:
3075 //
3076 // x = G_SUB 0, y
3077 // G_ICMP z, x
3078 //
3079 // Update the G_ICMP:
3080 //
3081 // G_ICMP z, y
3082 I.getOperand(3).setReg(RHSDef->getOperand(2).getReg());
3083 DidFold = true;
3084 }
3085
3086 if (DidFold) {
3087 // We can fold. Emit a CMN.
3088 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3089 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3090 bool Is32Bit =
3091 (MRI.getType(I.getOperand(2).getReg()).getSizeInBits() == 32);
3092 auto ImmFns = selectArithImmed(I.getOperand(3));
3093 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003094 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
Jessica Paquette49537bb2019-06-17 18:40:06 +00003095
3096 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {I.getOperand(2).getReg()});
3097
3098 // If we matched a valid constant immediate, add those operands.
3099 if (ImmFns) {
3100 for (auto &RenderFn : *ImmFns)
3101 RenderFn(CmpMI);
3102 } else {
3103 CmpMI.addUse(I.getOperand(3).getReg());
3104 }
3105
3106 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3107
3108 // Add a CSet after the CMN.
3109 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
3110 MIRBuilder);
3111 I.eraseFromParent();
3112 }
3113
3114 return DidFold;
3115}
3116
Amara Emerson761ca2e2019-03-19 21:43:05 +00003117bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3118 // Try to match a vector splat operation into a dup instruction.
3119 // We're looking for this pattern:
3120 // %scalar:gpr(s64) = COPY $x0
3121 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3122 // %cst0:gpr(s32) = G_CONSTANT i32 0
3123 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3124 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3125 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3126 // %zerovec(<2 x s32>)
3127 //
3128 // ...into:
3129 // %splat = DUP %scalar
3130 // We use the regbank of the scalar to determine which kind of dup to use.
3131 MachineIRBuilder MIB(I);
3132 MachineRegisterInfo &MRI = *MIB.getMRI();
3133 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3134 using namespace TargetOpcode;
3135 using namespace MIPatternMatch;
3136
3137 // Begin matching the insert.
3138 auto *InsMI =
3139 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
3140 if (!InsMI)
3141 return false;
3142 // Match the undef vector operand.
3143 auto *UndefMI =
3144 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
3145 if (!UndefMI)
3146 return false;
3147 // Match the scalar being splatted.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003148 Register ScalarReg = InsMI->getOperand(2).getReg();
Amara Emerson761ca2e2019-03-19 21:43:05 +00003149 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3150 // Match the index constant 0.
3151 int64_t Index = 0;
3152 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3153 return false;
3154
3155 // The shuffle's second operand doesn't matter if the mask is all zero.
3156 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
3157 if (!ZeroVec)
3158 return false;
3159 int64_t Zero = 0;
3160 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3161 return false;
3162 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
3163 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3164 return false; // This wasn't an all zeros vector.
3165 }
3166
3167 // We're done, now find out what kind of splat we need.
3168 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3169 LLT EltTy = VecTy.getElementType();
3170 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3171 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3172 return false;
3173 }
3174 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3175 static const unsigned OpcTable[2][2] = {
3176 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3177 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3178 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3179
3180 // For FP splats, we need to widen the scalar reg via undef too.
3181 if (IsFP) {
3182 MachineInstr *Widen = emitScalarToVector(
3183 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3184 if (!Widen)
3185 return false;
3186 ScalarReg = Widen->getOperand(0).getReg();
3187 }
3188 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3189 if (IsFP)
3190 Dup.addImm(0);
3191 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3192 I.eraseFromParent();
3193 return true;
3194}
3195
3196bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3197 if (TM.getOptLevel() == CodeGenOpt::None)
3198 return false;
3199 if (tryOptVectorDup(I))
3200 return true;
3201 return false;
3202}
3203
Amara Emerson1abe05c2019-02-21 20:20:16 +00003204bool AArch64InstructionSelector::selectShuffleVector(
3205 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003206 if (tryOptVectorShuffle(I))
3207 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003208 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003209 Register Src1Reg = I.getOperand(1).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003210 const LLT Src1Ty = MRI.getType(Src1Reg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003211 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003212 const LLT Src2Ty = MRI.getType(Src2Reg);
3213
3214 MachineBasicBlock &MBB = *I.getParent();
3215 MachineFunction &MF = *MBB.getParent();
3216 LLVMContext &Ctx = MF.getFunction().getContext();
3217
3218 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3219 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00003220 // find the mask indices. If the mask element is undef, then
3221 // collectShuffleMaskIndices() will add a None entry for that index into
3222 // the list.
3223 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003224 collectShuffleMaskIndices(I, MRI, Mask);
3225 assert(!Mask.empty() && "Expected to find mask indices");
3226
3227 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3228 // it's originated from a <1 x T> type. Those should have been lowered into
3229 // G_BUILD_VECTOR earlier.
3230 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3231 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3232 return false;
3233 }
3234
3235 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3236
3237 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00003238 for (auto &MaybeVal : Mask) {
3239 // For now, any undef indexes we'll just assume to be 0. This should be
3240 // optimized in future, e.g. to select DUP etc.
3241 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003242 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3243 unsigned Offset = Byte + Val * BytesPerElt;
3244 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3245 }
3246 }
3247
Amara Emerson8acb0d92019-03-04 19:16:00 +00003248 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003249
3250 // Use a constant pool to load the index vector for TBL.
3251 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003252 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3253 if (!IndexLoad) {
3254 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3255 return false;
3256 }
3257
Amara Emerson8acb0d92019-03-04 19:16:00 +00003258 if (DstTy.getSizeInBits() != 128) {
3259 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3260 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00003261 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003262 if (!Concat) {
3263 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3264 return false;
3265 }
3266
3267 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3268 IndexLoad =
3269 emitScalarToVector(64, &AArch64::FPR128RegClass,
3270 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3271
3272 auto TBL1 = MIRBuilder.buildInstr(
3273 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3274 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3275 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3276
Amara Emerson3739a202019-03-15 21:59:50 +00003277 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00003278 MIRBuilder
3279 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3280 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003281 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3282 I.eraseFromParent();
3283 return true;
3284 }
3285
Amara Emerson1abe05c2019-02-21 20:20:16 +00003286 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3287 // Q registers for regalloc.
3288 auto RegSeq = MIRBuilder
3289 .buildInstr(TargetOpcode::REG_SEQUENCE,
3290 {&AArch64::QQRegClass}, {Src1Reg})
3291 .addImm(AArch64::qsub0)
3292 .addUse(Src2Reg)
3293 .addImm(AArch64::qsub1);
3294
3295 auto TBL2 =
3296 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3297 {RegSeq, IndexLoad->getOperand(0).getReg()});
3298 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3299 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3300 I.eraseFromParent();
3301 return true;
3302}
3303
Jessica Paquette16d67a32019-03-13 23:22:23 +00003304MachineInstr *AArch64InstructionSelector::emitLaneInsert(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003305 Optional<Register> DstReg, Register SrcReg, Register EltReg,
Jessica Paquette16d67a32019-03-13 23:22:23 +00003306 unsigned LaneIdx, const RegisterBank &RB,
3307 MachineIRBuilder &MIRBuilder) const {
3308 MachineInstr *InsElt = nullptr;
3309 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3310 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3311
3312 // Create a register to define with the insert if one wasn't passed in.
3313 if (!DstReg)
3314 DstReg = MRI.createVirtualRegister(DstRC);
3315
3316 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3317 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3318
3319 if (RB.getID() == AArch64::FPRRegBankID) {
3320 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3321 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3322 .addImm(LaneIdx)
3323 .addUse(InsSub->getOperand(0).getReg())
3324 .addImm(0);
3325 } else {
3326 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3327 .addImm(LaneIdx)
3328 .addUse(EltReg);
3329 }
3330
3331 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3332 return InsElt;
3333}
3334
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003335bool AArch64InstructionSelector::selectInsertElt(
3336 MachineInstr &I, MachineRegisterInfo &MRI) const {
3337 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3338
3339 // Get information on the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003340 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003341 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003342 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003343
3344 // Get information on the element we want to insert into the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003345 Register EltReg = I.getOperand(2).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003346 const LLT EltTy = MRI.getType(EltReg);
3347 unsigned EltSize = EltTy.getSizeInBits();
3348 if (EltSize < 16 || EltSize > 64)
3349 return false; // Don't support all element types yet.
3350
3351 // Find the definition of the index. Bail out if it's not defined by a
3352 // G_CONSTANT.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003353 Register IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00003354 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3355 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003356 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00003357 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003358
3359 // Perform the lane insert.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003360 Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003361 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3362 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003363
3364 if (VecSize < 128) {
3365 // If the vector we're inserting into is smaller than 128 bits, widen it
3366 // to 128 to do the insert.
3367 MachineInstr *ScalarToVec = emitScalarToVector(
3368 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3369 if (!ScalarToVec)
3370 return false;
3371 SrcReg = ScalarToVec->getOperand(0).getReg();
3372 }
3373
3374 // Create an insert into a new FPR128 register.
3375 // Note that if our vector is already 128 bits, we end up emitting an extra
3376 // register.
3377 MachineInstr *InsMI =
3378 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3379
3380 if (VecSize < 128) {
3381 // If we had to widen to perform the insert, then we have to demote back to
3382 // the original size to get the result we want.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003383 Register DemoteVec = InsMI->getOperand(0).getReg();
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003384 const TargetRegisterClass *RC =
3385 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3386 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3387 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3388 return false;
3389 }
3390 unsigned SubReg = 0;
3391 if (!getSubRegForClass(RC, TRI, SubReg))
3392 return false;
3393 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3394 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3395 << "\n");
3396 return false;
3397 }
3398 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3399 .addReg(DemoteVec, 0, SubReg);
3400 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3401 } else {
3402 // No widening needed.
3403 InsMI->getOperand(0).setReg(DstReg);
3404 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3405 }
3406
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003407 I.eraseFromParent();
3408 return true;
3409}
3410
Amara Emerson5ec14602018-12-10 18:44:58 +00003411bool AArch64InstructionSelector::selectBuildVector(
3412 MachineInstr &I, MachineRegisterInfo &MRI) const {
3413 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3414 // Until we port more of the optimized selections, for now just use a vector
3415 // insert sequence.
3416 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3417 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3418 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003419 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003420 return false; // Don't support all element types yet.
3421 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003422 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003423
3424 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003425 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003426 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3427 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003428 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003429 return false;
3430
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003431 Register DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003432 unsigned DstSize = DstTy.getSizeInBits();
3433
3434 // Keep track of the last MI we inserted. Later on, we might be able to save
3435 // a copy using it.
3436 MachineInstr *PrevMI = nullptr;
3437 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003438 // Note that if we don't do a subregister copy, we can end up making an
3439 // extra register.
3440 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3441 MIRBuilder);
3442 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003443 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003444
3445 // If DstTy's size in bits is less than 128, then emit a subregister copy
3446 // from DstVec to the last register we've defined.
3447 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003448 // Force this to be FPR using the destination vector.
3449 const TargetRegisterClass *RC =
3450 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003451 if (!RC)
3452 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003453 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3454 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3455 return false;
3456 }
3457
3458 unsigned SubReg = 0;
3459 if (!getSubRegForClass(RC, TRI, SubReg))
3460 return false;
3461 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3462 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3463 << "\n");
3464 return false;
3465 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003466
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003467 Register Reg = MRI.createVirtualRegister(RC);
3468 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003469
Amara Emerson86271782019-03-18 19:20:10 +00003470 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3471 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003472 MachineOperand &RegOp = I.getOperand(1);
3473 RegOp.setReg(Reg);
3474 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3475 } else {
3476 // We don't need a subregister copy. Save a copy by re-using the
3477 // destination register on the final insert.
3478 assert(PrevMI && "PrevMI was null?");
3479 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3480 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3481 }
3482
Amara Emerson5ec14602018-12-10 18:44:58 +00003483 I.eraseFromParent();
3484 return true;
3485}
3486
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003487/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3488/// ID if it exists, and 0 otherwise.
3489static unsigned findIntrinsicID(MachineInstr &I) {
3490 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3491 return Op.isIntrinsicID();
3492 });
3493 if (IntrinOp == I.operands_end())
3494 return 0;
3495 return IntrinOp->getIntrinsicID();
3496}
3497
Jessica Paquette22c62152019-04-02 19:57:26 +00003498/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3499/// intrinsic.
3500static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3501 switch (NumBytesToStore) {
3502 // TODO: 1, 2, and 4 byte stores.
3503 case 8:
3504 return AArch64::STLXRX;
3505 default:
3506 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3507 << NumBytesToStore << ")\n");
3508 break;
3509 }
3510 return 0;
3511}
3512
3513bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3514 MachineInstr &I, MachineRegisterInfo &MRI) const {
3515 // Find the intrinsic ID.
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003516 unsigned IntrinID = findIntrinsicID(I);
3517 if (!IntrinID)
Jessica Paquette22c62152019-04-02 19:57:26 +00003518 return false;
Jessica Paquette22c62152019-04-02 19:57:26 +00003519 MachineIRBuilder MIRBuilder(I);
3520
3521 // Select the instruction.
3522 switch (IntrinID) {
3523 default:
3524 return false;
3525 case Intrinsic::trap:
3526 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3527 break;
Tom Tan7ecb5142019-06-21 23:38:05 +00003528 case Intrinsic::debugtrap:
3529 if (!STI.isTargetWindows())
3530 return false;
3531 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3532 break;
Jessica Paquette22c62152019-04-02 19:57:26 +00003533 case Intrinsic::aarch64_stlxr:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003534 Register StatReg = I.getOperand(0).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003535 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3536 "Status register must be 32 bits!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003537 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003538
3539 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3540 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3541 return false;
3542 }
3543
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003544 Register PtrReg = I.getOperand(3).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003545 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3546
3547 // Expect only one memory operand.
3548 if (!I.hasOneMemOperand())
3549 return false;
3550
3551 const MachineMemOperand *MemOp = *I.memoperands_begin();
3552 unsigned NumBytesToStore = MemOp->getSize();
3553 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3554 if (!Opc)
3555 return false;
3556
3557 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3558 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3559 }
3560
3561 I.eraseFromParent();
3562 return true;
3563}
3564
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003565bool AArch64InstructionSelector::selectIntrinsic(
3566 MachineInstr &I, MachineRegisterInfo &MRI) const {
3567 unsigned IntrinID = findIntrinsicID(I);
3568 if (!IntrinID)
3569 return false;
3570 MachineIRBuilder MIRBuilder(I);
3571
3572 switch (IntrinID) {
3573 default:
3574 break;
3575 case Intrinsic::aarch64_crypto_sha1h:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003576 Register DstReg = I.getOperand(0).getReg();
3577 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003578
3579 // FIXME: Should this be an assert?
3580 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3581 MRI.getType(SrcReg).getSizeInBits() != 32)
3582 return false;
3583
3584 // The operation has to happen on FPRs. Set up some new FPR registers for
3585 // the source and destination if they are on GPRs.
3586 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3587 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3588 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3589
3590 // Make sure the copy ends up getting constrained properly.
3591 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3592 AArch64::GPR32RegClass, MRI);
3593 }
3594
3595 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3596 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3597
3598 // Actually insert the instruction.
3599 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3600 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3601
3602 // Did we create a new register for the destination?
3603 if (DstReg != I.getOperand(0).getReg()) {
3604 // Yep. Copy the result of the instruction back into the original
3605 // destination.
3606 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3607 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3608 AArch64::GPR32RegClass, MRI);
3609 }
3610
3611 I.eraseFromParent();
3612 return true;
3613 }
3614 return false;
3615}
3616
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003617/// SelectArithImmed - Select an immediate value that can be represented as
3618/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3619/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003620InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00003621AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003622 MachineInstr &MI = *Root.getParent();
3623 MachineBasicBlock &MBB = *MI.getParent();
3624 MachineFunction &MF = *MBB.getParent();
3625 MachineRegisterInfo &MRI = MF.getRegInfo();
3626
3627 // This function is called from the addsub_shifted_imm ComplexPattern,
3628 // which lists [imm] as the list of opcode it's interested in, however
3629 // we still need to check whether the operand is actually an immediate
3630 // here because the ComplexPattern opcode list is only used in
3631 // root-level opcode matching.
3632 uint64_t Immed;
3633 if (Root.isImm())
3634 Immed = Root.getImm();
3635 else if (Root.isCImm())
3636 Immed = Root.getCImm()->getZExtValue();
3637 else if (Root.isReg()) {
3638 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3639 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003640 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003641 MachineOperand &Op1 = Def->getOperand(1);
3642 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003643 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003644 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003645 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003646 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003647
3648 unsigned ShiftAmt;
3649
3650 if (Immed >> 12 == 0) {
3651 ShiftAmt = 0;
3652 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3653 ShiftAmt = 12;
3654 Immed = Immed >> 12;
3655 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003656 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003657
3658 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003659 return {{
3660 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3661 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3662 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003663}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003664
Daniel Sandersea8711b2017-10-16 03:36:29 +00003665/// Select a "register plus unscaled signed 9-bit immediate" address. This
3666/// should only match when there is an offset that is not valid for a scaled
3667/// immediate addressing mode. The "Size" argument is the size in bytes of the
3668/// memory reference, which is needed here to know what is valid for a scaled
3669/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003670InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003671AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3672 unsigned Size) const {
3673 MachineRegisterInfo &MRI =
3674 Root.getParent()->getParent()->getParent()->getRegInfo();
3675
3676 if (!Root.isReg())
3677 return None;
3678
3679 if (!isBaseWithConstantOffset(Root, MRI))
3680 return None;
3681
3682 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3683 if (!RootDef)
3684 return None;
3685
3686 MachineOperand &OffImm = RootDef->getOperand(2);
3687 if (!OffImm.isReg())
3688 return None;
3689 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3690 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3691 return None;
3692 int64_t RHSC;
3693 MachineOperand &RHSOp1 = RHS->getOperand(1);
3694 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3695 return None;
3696 RHSC = RHSOp1.getCImm()->getSExtValue();
3697
3698 // If the offset is valid as a scaled immediate, don't match here.
3699 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3700 return None;
3701 if (RHSC >= -256 && RHSC < 256) {
3702 MachineOperand &Base = RootDef->getOperand(1);
3703 return {{
3704 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3705 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3706 }};
3707 }
3708 return None;
3709}
3710
3711/// Select a "register plus scaled unsigned 12-bit immediate" address. The
3712/// "Size" argument is the size in bytes of the memory reference, which
3713/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003714InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003715AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3716 unsigned Size) const {
3717 MachineRegisterInfo &MRI =
3718 Root.getParent()->getParent()->getParent()->getRegInfo();
3719
3720 if (!Root.isReg())
3721 return None;
3722
3723 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3724 if (!RootDef)
3725 return None;
3726
3727 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3728 return {{
3729 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3730 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3731 }};
3732 }
3733
3734 if (isBaseWithConstantOffset(Root, MRI)) {
3735 MachineOperand &LHS = RootDef->getOperand(1);
3736 MachineOperand &RHS = RootDef->getOperand(2);
3737 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3738 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3739 if (LHSDef && RHSDef) {
3740 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3741 unsigned Scale = Log2_32(Size);
3742 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3743 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00003744 return {{
3745 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3746 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3747 }};
3748
Daniel Sandersea8711b2017-10-16 03:36:29 +00003749 return {{
3750 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3751 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3752 }};
3753 }
3754 }
3755 }
3756
3757 // Before falling back to our general case, check if the unscaled
3758 // instructions can handle this. If so, that's preferable.
3759 if (selectAddrModeUnscaled(Root, Size).hasValue())
3760 return None;
3761
3762 return {{
3763 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3764 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3765 }};
3766}
3767
Volkan Kelesf7f25682018-01-16 18:44:05 +00003768void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3769 const MachineInstr &MI) const {
3770 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3771 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3772 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
3773 assert(CstVal && "Expected constant value");
3774 MIB.addImm(CstVal.getValue());
3775}
3776
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003777namespace llvm {
3778InstructionSelector *
3779createAArch64InstructionSelector(const AArch64TargetMachine &TM,
3780 AArch64Subtarget &Subtarget,
3781 AArch64RegisterBankInfo &RBI) {
3782 return new AArch64InstructionSelector(TM, Subtarget, RBI);
3783}
3784}