blob: df9fd739f57331ed9af856cd369318a72c7ed9c7 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson9bf092d2019-04-09 21:22:43 +000070 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72
Amara Emerson5ec14602018-12-10 18:44:58 +000073 // Helper to generate an equivalent of scalar_to_vector into a new register,
74 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000075 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000076 const TargetRegisterClass *DstRC,
77 unsigned Scalar,
78 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000079
80 /// Emit a lane insert into \p DstReg, or a new vector register if None is
81 /// provided.
82 ///
83 /// The lane inserted into is defined by \p LaneIdx. The vector source
84 /// register is given by \p SrcReg. The register containing the element is
85 /// given by \p EltReg.
86 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
87 unsigned EltReg, unsigned LaneIdx,
88 const RegisterBank &RB,
89 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000090 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000092 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000093 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000094
Amara Emerson1abe05c2019-02-21 20:20:16 +000095 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +000096 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +000097 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000098 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000099 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000100 bool selectSplitVectorUnmerge(MachineInstr &I,
101 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000102 bool selectIntrinsicWithSideEffects(MachineInstr &I,
103 MachineRegisterInfo &MRI) const;
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +0000104 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000105 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000106 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000107 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000108 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
109 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
110 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000111
112 // Emit a vector concat operation.
113 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
114 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000115 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000116 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
117 const RegisterBank &DstRB, LLT ScalarTy,
118 unsigned VecReg, unsigned LaneIdx,
119 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000120
Jessica Paquettea3843fe2019-05-01 22:39:43 +0000121 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
122 /// materialized using a FMOV instruction, then update MI and return it.
123 /// Otherwise, do nothing and return a nullptr.
124 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
125 MachineRegisterInfo &MRI) const;
126
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000127 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000128
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000129 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
130 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000131
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000132 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000133 return selectAddrModeUnscaled(Root, 1);
134 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000135 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000136 return selectAddrModeUnscaled(Root, 2);
137 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000138 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000139 return selectAddrModeUnscaled(Root, 4);
140 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000141 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000142 return selectAddrModeUnscaled(Root, 8);
143 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000144 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000145 return selectAddrModeUnscaled(Root, 16);
146 }
147
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000148 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
149 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000150 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000151 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000152 return selectAddrModeIndexed(Root, Width / 8);
153 }
154
Volkan Kelesf7f25682018-01-16 18:44:05 +0000155 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
156
Amara Emerson1e8c1642018-07-31 00:09:02 +0000157 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
158 void materializeLargeCMVal(MachineInstr &I, const Value *V,
159 unsigned char OpFlags) const;
160
Amara Emerson761ca2e2019-03-19 21:43:05 +0000161 // Optimization methods.
162
163 // Helper function to check if a reg def is an MI with a given opcode and
164 // returns it if so.
165 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
166 MachineIRBuilder &MIB) const {
167 auto *Def = MIB.getMRI()->getVRegDef(Reg);
168 if (!Def || Def->getOpcode() != Opc)
169 return nullptr;
170 return Def;
171 }
172
173 bool tryOptVectorShuffle(MachineInstr &I) const;
174 bool tryOptVectorDup(MachineInstr &MI) const;
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000175 bool tryOptSelect(MachineInstr &MI) const;
Amara Emerson761ca2e2019-03-19 21:43:05 +0000176
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000177 const AArch64TargetMachine &TM;
178 const AArch64Subtarget &STI;
179 const AArch64InstrInfo &TII;
180 const AArch64RegisterInfo &TRI;
181 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000182
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000183#define GET_GLOBALISEL_PREDICATES_DECL
184#include "AArch64GenGlobalISel.inc"
185#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000186
187// We declare the temporaries used by selectImpl() in the class to minimize the
188// cost of constructing placeholder values.
189#define GET_GLOBALISEL_TEMPORARIES_DECL
190#include "AArch64GenGlobalISel.inc"
191#undef GET_GLOBALISEL_TEMPORARIES_DECL
192};
193
194} // end anonymous namespace
195
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000196#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000197#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000198#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000199
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000200AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000201 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
202 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000203 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000204 TRI(*STI.getRegisterInfo()), RBI(RBI),
205#define GET_GLOBALISEL_PREDICATES_INIT
206#include "AArch64GenGlobalISel.inc"
207#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000208#define GET_GLOBALISEL_TEMPORARIES_INIT
209#include "AArch64GenGlobalISel.inc"
210#undef GET_GLOBALISEL_TEMPORARIES_INIT
211{
212}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000213
Tim Northoverfb8d9892016-10-12 22:49:15 +0000214// FIXME: This should be target-independent, inferred from the types declared
215// for each class in the bank.
216static const TargetRegisterClass *
217getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000218 const RegisterBankInfo &RBI,
219 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000220 if (RB.getID() == AArch64::GPRRegBankID) {
221 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000222 return GetAllRegSet ? &AArch64::GPR32allRegClass
223 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000224 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000225 return GetAllRegSet ? &AArch64::GPR64allRegClass
226 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000227 return nullptr;
228 }
229
230 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000231 if (Ty.getSizeInBits() <= 16)
232 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000233 if (Ty.getSizeInBits() == 32)
234 return &AArch64::FPR32RegClass;
235 if (Ty.getSizeInBits() == 64)
236 return &AArch64::FPR64RegClass;
237 if (Ty.getSizeInBits() == 128)
238 return &AArch64::FPR128RegClass;
239 return nullptr;
240 }
241
242 return nullptr;
243}
244
Jessica Paquette245047d2019-01-24 22:00:41 +0000245/// Given a register bank, and size in bits, return the smallest register class
246/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000247static const TargetRegisterClass *
248getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
249 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000250 unsigned RegBankID = RB.getID();
251
252 if (RegBankID == AArch64::GPRRegBankID) {
253 if (SizeInBits <= 32)
254 return GetAllRegSet ? &AArch64::GPR32allRegClass
255 : &AArch64::GPR32RegClass;
256 if (SizeInBits == 64)
257 return GetAllRegSet ? &AArch64::GPR64allRegClass
258 : &AArch64::GPR64RegClass;
259 }
260
261 if (RegBankID == AArch64::FPRRegBankID) {
262 switch (SizeInBits) {
263 default:
264 return nullptr;
265 case 8:
266 return &AArch64::FPR8RegClass;
267 case 16:
268 return &AArch64::FPR16RegClass;
269 case 32:
270 return &AArch64::FPR32RegClass;
271 case 64:
272 return &AArch64::FPR64RegClass;
273 case 128:
274 return &AArch64::FPR128RegClass;
275 }
276 }
277
278 return nullptr;
279}
280
281/// Returns the correct subregister to use for a given register class.
282static bool getSubRegForClass(const TargetRegisterClass *RC,
283 const TargetRegisterInfo &TRI, unsigned &SubReg) {
284 switch (TRI.getRegSizeInBits(*RC)) {
285 case 8:
286 SubReg = AArch64::bsub;
287 break;
288 case 16:
289 SubReg = AArch64::hsub;
290 break;
291 case 32:
292 if (RC == &AArch64::GPR32RegClass)
293 SubReg = AArch64::sub_32;
294 else
295 SubReg = AArch64::ssub;
296 break;
297 case 64:
298 SubReg = AArch64::dsub;
299 break;
300 default:
301 LLVM_DEBUG(
302 dbgs() << "Couldn't find appropriate subregister for register class.");
303 return false;
304 }
305
306 return true;
307}
308
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000309/// Check whether \p I is a currently unsupported binary operation:
310/// - it has an unsized type
311/// - an operand is not a vreg
312/// - all operands are not in the same bank
313/// These are checks that should someday live in the verifier, but right now,
314/// these are mostly limitations of the aarch64 selector.
315static bool unsupportedBinOp(const MachineInstr &I,
316 const AArch64RegisterBankInfo &RBI,
317 const MachineRegisterInfo &MRI,
318 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000319 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000320 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000321 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000322 return true;
323 }
324
325 const RegisterBank *PrevOpBank = nullptr;
326 for (auto &MO : I.operands()) {
327 // FIXME: Support non-register operands.
328 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000329 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000330 return true;
331 }
332
333 // FIXME: Can generic operations have physical registers operands? If
334 // so, this will need to be taught about that, and we'll need to get the
335 // bank out of the minimal class for the register.
336 // Either way, this needs to be documented (and possibly verified).
337 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000338 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000339 return true;
340 }
341
342 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
343 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000344 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000345 return true;
346 }
347
348 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000349 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000350 return true;
351 }
352 PrevOpBank = OpBank;
353 }
354 return false;
355}
356
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000357/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000358/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000359/// and of size \p OpSize.
360/// \returns \p GenericOpc if the combination is unsupported.
361static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
362 unsigned OpSize) {
363 switch (RegBankID) {
364 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000365 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000366 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000367 case TargetOpcode::G_SHL:
368 return AArch64::LSLVWr;
369 case TargetOpcode::G_LSHR:
370 return AArch64::LSRVWr;
371 case TargetOpcode::G_ASHR:
372 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000373 default:
374 return GenericOpc;
375 }
Tim Northover55782222016-10-18 20:03:48 +0000376 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000377 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000378 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000379 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000380 case TargetOpcode::G_SHL:
381 return AArch64::LSLVXr;
382 case TargetOpcode::G_LSHR:
383 return AArch64::LSRVXr;
384 case TargetOpcode::G_ASHR:
385 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000386 default:
387 return GenericOpc;
388 }
389 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000390 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000391 case AArch64::FPRRegBankID:
392 switch (OpSize) {
393 case 32:
394 switch (GenericOpc) {
395 case TargetOpcode::G_FADD:
396 return AArch64::FADDSrr;
397 case TargetOpcode::G_FSUB:
398 return AArch64::FSUBSrr;
399 case TargetOpcode::G_FMUL:
400 return AArch64::FMULSrr;
401 case TargetOpcode::G_FDIV:
402 return AArch64::FDIVSrr;
403 default:
404 return GenericOpc;
405 }
406 case 64:
407 switch (GenericOpc) {
408 case TargetOpcode::G_FADD:
409 return AArch64::FADDDrr;
410 case TargetOpcode::G_FSUB:
411 return AArch64::FSUBDrr;
412 case TargetOpcode::G_FMUL:
413 return AArch64::FMULDrr;
414 case TargetOpcode::G_FDIV:
415 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000416 case TargetOpcode::G_OR:
417 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000418 default:
419 return GenericOpc;
420 }
421 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000422 break;
423 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000424 return GenericOpc;
425}
426
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000427/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
428/// appropriate for the (value) register bank \p RegBankID and of memory access
429/// size \p OpSize. This returns the variant with the base+unsigned-immediate
430/// addressing mode (e.g., LDRXui).
431/// \returns \p GenericOpc if the combination is unsupported.
432static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
433 unsigned OpSize) {
434 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
435 switch (RegBankID) {
436 case AArch64::GPRRegBankID:
437 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000438 case 8:
439 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
440 case 16:
441 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000442 case 32:
443 return isStore ? AArch64::STRWui : AArch64::LDRWui;
444 case 64:
445 return isStore ? AArch64::STRXui : AArch64::LDRXui;
446 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000447 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000448 case AArch64::FPRRegBankID:
449 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000450 case 8:
451 return isStore ? AArch64::STRBui : AArch64::LDRBui;
452 case 16:
453 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000454 case 32:
455 return isStore ? AArch64::STRSui : AArch64::LDRSui;
456 case 64:
457 return isStore ? AArch64::STRDui : AArch64::LDRDui;
458 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000459 break;
460 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000461 return GenericOpc;
462}
463
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000464#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000465/// Helper function that verifies that we have a valid copy at the end of
466/// selectCopy. Verifies that the source and dest have the expected sizes and
467/// then returns true.
468static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
469 const MachineRegisterInfo &MRI,
470 const TargetRegisterInfo &TRI,
471 const RegisterBankInfo &RBI) {
472 const unsigned DstReg = I.getOperand(0).getReg();
473 const unsigned SrcReg = I.getOperand(1).getReg();
474 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
475 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000476
Jessica Paquette245047d2019-01-24 22:00:41 +0000477 // Make sure the size of the source and dest line up.
478 assert(
479 (DstSize == SrcSize ||
480 // Copies are a mean to setup initial types, the number of
481 // bits may not exactly match.
482 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
483 // Copies are a mean to copy bits around, as long as we are
484 // on the same register class, that's fine. Otherwise, that
485 // means we need some SUBREG_TO_REG or AND & co.
486 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
487 "Copy with different width?!");
488
489 // Check the size of the destination.
490 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
491 "GPRs cannot get more than 64-bit width values");
492
493 return true;
494}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000495#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000496
497/// Helper function for selectCopy. Inserts a subregister copy from
498/// \p *From to \p *To, linking it up to \p I.
499///
500/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
501///
502/// CopyReg (From class) = COPY SrcReg
503/// SubRegCopy (To class) = COPY CopyReg:SubReg
504/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000505static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000506 const RegisterBankInfo &RBI, unsigned SrcReg,
507 const TargetRegisterClass *From,
508 const TargetRegisterClass *To,
509 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000510 MachineIRBuilder MIB(I);
511 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000512 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
513 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000514 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000515 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000516
517 // It's possible that the destination register won't be constrained. Make
518 // sure that happens.
519 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
520 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
521
Amara Emersondb211892018-02-20 05:11:57 +0000522 return true;
523}
524
Jessica Paquette910630c2019-05-03 22:37:46 +0000525/// Helper function to get the source and destination register classes for a
526/// copy. Returns a std::pair containing the source register class for the
527/// copy, and the destination register class for the copy. If a register class
528/// cannot be determined, then it will be nullptr.
529static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
530getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
531 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
532 const RegisterBankInfo &RBI) {
533 unsigned DstReg = I.getOperand(0).getReg();
534 unsigned SrcReg = I.getOperand(1).getReg();
535 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
536 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
537 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
538 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
539
540 // Special casing for cross-bank copies of s1s. We can technically represent
541 // a 1-bit value with any size of register. The minimum size for a GPR is 32
542 // bits. So, we need to put the FPR on 32 bits as well.
543 //
544 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
545 // then we can pull it into the helpers that get the appropriate class for a
546 // register bank. Or make a new helper that carries along some constraint
547 // information.
548 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
549 SrcSize = DstSize = 32;
550
551 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
552 getMinClassForRegBank(DstRegBank, DstSize, true)};
553}
554
Quentin Colombetcb629a82016-10-12 03:57:49 +0000555static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
556 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
557 const RegisterBankInfo &RBI) {
558
559 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000560 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000561 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
562 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
Jessica Paquette910630c2019-05-03 22:37:46 +0000563
564 // Find the correct register classes for the source and destination registers.
565 const TargetRegisterClass *SrcRC;
566 const TargetRegisterClass *DstRC;
567 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
568
Jessica Paquette245047d2019-01-24 22:00:41 +0000569 if (!DstRC) {
570 LLVM_DEBUG(dbgs() << "Unexpected dest size "
571 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000572 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000573 }
574
Jessica Paquette245047d2019-01-24 22:00:41 +0000575 // A couple helpers below, for making sure that the copy we produce is valid.
576
577 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
578 // to verify that the src and dst are the same size, since that's handled by
579 // the SUBREG_TO_REG.
580 bool KnownValid = false;
581
582 // Returns true, or asserts if something we don't expect happens. Instead of
583 // returning true, we return isValidCopy() to ensure that we verify the
584 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000585 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000586 // If we have a bitcast or something, we can't have physical registers.
587 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000588 (I.isCopy() ||
589 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
590 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
591 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000592 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000593 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000594 return true;
595 };
596
597 // Is this a copy? If so, then we may need to insert a subregister copy, or
598 // a SUBREG_TO_REG.
599 if (I.isCopy()) {
600 // Yes. Check if there's anything to fix up.
Amara Emerson7e9f3482018-02-18 17:10:49 +0000601 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000602 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
603 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000604 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000605
606 // Is this a cross-bank copy?
607 if (DstRegBank.getID() != SrcRegBank.getID()) {
608 // If we're doing a cross-bank copy on different-sized registers, we need
609 // to do a bit more work.
610 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
611 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
612
613 if (SrcSize > DstSize) {
614 // We're doing a cross-bank copy into a smaller register. We need a
615 // subregister copy. First, get a register class that's on the same bank
616 // as the destination, but the same size as the source.
617 const TargetRegisterClass *SubregRC =
618 getMinClassForRegBank(DstRegBank, SrcSize, true);
619 assert(SubregRC && "Didn't get a register class for subreg?");
620
621 // Get the appropriate subregister for the destination.
622 unsigned SubReg = 0;
623 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
624 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
625 return false;
626 }
627
628 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000629 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000630 return CheckCopy();
631 }
632
633 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
634 SrcSize == 16) {
635 // Special case for FPR16 to GPR32.
636 // FIXME: This can probably be generalized like the above case.
637 unsigned PromoteReg =
638 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
639 BuildMI(*I.getParent(), I, I.getDebugLoc(),
640 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
641 .addImm(0)
642 .addUse(SrcReg)
643 .addImm(AArch64::hsub);
644 MachineOperand &RegOp = I.getOperand(1);
645 RegOp.setReg(PromoteReg);
646
647 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
648 KnownValid = true;
649 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000650 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000651
652 // If the destination is a physical register, then there's nothing to
653 // change, so we're done.
654 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
655 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000656 }
657
Jessica Paquette245047d2019-01-24 22:00:41 +0000658 // No need to constrain SrcReg. It will get constrained when we hit another
659 // of its use or its defs. Copies do not have constraints.
660 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000661 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
662 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000663 return false;
664 }
665 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000666 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000667}
668
Tim Northover69271c62016-10-12 22:49:11 +0000669static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
670 if (!DstTy.isScalar() || !SrcTy.isScalar())
671 return GenericOpc;
672
673 const unsigned DstSize = DstTy.getSizeInBits();
674 const unsigned SrcSize = SrcTy.getSizeInBits();
675
676 switch (DstSize) {
677 case 32:
678 switch (SrcSize) {
679 case 32:
680 switch (GenericOpc) {
681 case TargetOpcode::G_SITOFP:
682 return AArch64::SCVTFUWSri;
683 case TargetOpcode::G_UITOFP:
684 return AArch64::UCVTFUWSri;
685 case TargetOpcode::G_FPTOSI:
686 return AArch64::FCVTZSUWSr;
687 case TargetOpcode::G_FPTOUI:
688 return AArch64::FCVTZUUWSr;
689 default:
690 return GenericOpc;
691 }
692 case 64:
693 switch (GenericOpc) {
694 case TargetOpcode::G_SITOFP:
695 return AArch64::SCVTFUXSri;
696 case TargetOpcode::G_UITOFP:
697 return AArch64::UCVTFUXSri;
698 case TargetOpcode::G_FPTOSI:
699 return AArch64::FCVTZSUWDr;
700 case TargetOpcode::G_FPTOUI:
701 return AArch64::FCVTZUUWDr;
702 default:
703 return GenericOpc;
704 }
705 default:
706 return GenericOpc;
707 }
708 case 64:
709 switch (SrcSize) {
710 case 32:
711 switch (GenericOpc) {
712 case TargetOpcode::G_SITOFP:
713 return AArch64::SCVTFUWDri;
714 case TargetOpcode::G_UITOFP:
715 return AArch64::UCVTFUWDri;
716 case TargetOpcode::G_FPTOSI:
717 return AArch64::FCVTZSUXSr;
718 case TargetOpcode::G_FPTOUI:
719 return AArch64::FCVTZUUXSr;
720 default:
721 return GenericOpc;
722 }
723 case 64:
724 switch (GenericOpc) {
725 case TargetOpcode::G_SITOFP:
726 return AArch64::SCVTFUXDri;
727 case TargetOpcode::G_UITOFP:
728 return AArch64::UCVTFUXDri;
729 case TargetOpcode::G_FPTOSI:
730 return AArch64::FCVTZSUXDr;
731 case TargetOpcode::G_FPTOUI:
732 return AArch64::FCVTZUUXDr;
733 default:
734 return GenericOpc;
735 }
736 default:
737 return GenericOpc;
738 }
739 default:
740 return GenericOpc;
741 };
742 return GenericOpc;
743}
744
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000745static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
746 const RegisterBankInfo &RBI) {
747 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
748 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
749 AArch64::GPRRegBankID);
750 LLT Ty = MRI.getType(I.getOperand(0).getReg());
751 if (Ty == LLT::scalar(32))
752 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
753 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
754 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
755 return 0;
756}
757
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +0000758/// Helper function to select the opcode for a G_FCMP.
759static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
760 // If this is a compare against +0.0, then we don't have to explicitly
761 // materialize a constant.
762 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
763 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
764 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
765 if (OpSize != 32 && OpSize != 64)
766 return 0;
767 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
768 {AArch64::FCMPSri, AArch64::FCMPDri}};
769 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
770}
771
Tim Northover6c02ad52016-10-12 22:49:04 +0000772static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
773 switch (P) {
774 default:
775 llvm_unreachable("Unknown condition code!");
776 case CmpInst::ICMP_NE:
777 return AArch64CC::NE;
778 case CmpInst::ICMP_EQ:
779 return AArch64CC::EQ;
780 case CmpInst::ICMP_SGT:
781 return AArch64CC::GT;
782 case CmpInst::ICMP_SGE:
783 return AArch64CC::GE;
784 case CmpInst::ICMP_SLT:
785 return AArch64CC::LT;
786 case CmpInst::ICMP_SLE:
787 return AArch64CC::LE;
788 case CmpInst::ICMP_UGT:
789 return AArch64CC::HI;
790 case CmpInst::ICMP_UGE:
791 return AArch64CC::HS;
792 case CmpInst::ICMP_ULT:
793 return AArch64CC::LO;
794 case CmpInst::ICMP_ULE:
795 return AArch64CC::LS;
796 }
797}
798
Tim Northover7dd378d2016-10-12 22:49:07 +0000799static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
800 AArch64CC::CondCode &CondCode,
801 AArch64CC::CondCode &CondCode2) {
802 CondCode2 = AArch64CC::AL;
803 switch (P) {
804 default:
805 llvm_unreachable("Unknown FP condition!");
806 case CmpInst::FCMP_OEQ:
807 CondCode = AArch64CC::EQ;
808 break;
809 case CmpInst::FCMP_OGT:
810 CondCode = AArch64CC::GT;
811 break;
812 case CmpInst::FCMP_OGE:
813 CondCode = AArch64CC::GE;
814 break;
815 case CmpInst::FCMP_OLT:
816 CondCode = AArch64CC::MI;
817 break;
818 case CmpInst::FCMP_OLE:
819 CondCode = AArch64CC::LS;
820 break;
821 case CmpInst::FCMP_ONE:
822 CondCode = AArch64CC::MI;
823 CondCode2 = AArch64CC::GT;
824 break;
825 case CmpInst::FCMP_ORD:
826 CondCode = AArch64CC::VC;
827 break;
828 case CmpInst::FCMP_UNO:
829 CondCode = AArch64CC::VS;
830 break;
831 case CmpInst::FCMP_UEQ:
832 CondCode = AArch64CC::EQ;
833 CondCode2 = AArch64CC::VS;
834 break;
835 case CmpInst::FCMP_UGT:
836 CondCode = AArch64CC::HI;
837 break;
838 case CmpInst::FCMP_UGE:
839 CondCode = AArch64CC::PL;
840 break;
841 case CmpInst::FCMP_ULT:
842 CondCode = AArch64CC::LT;
843 break;
844 case CmpInst::FCMP_ULE:
845 CondCode = AArch64CC::LE;
846 break;
847 case CmpInst::FCMP_UNE:
848 CondCode = AArch64CC::NE;
849 break;
850 }
851}
852
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000853bool AArch64InstructionSelector::selectCompareBranch(
854 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
855
856 const unsigned CondReg = I.getOperand(0).getReg();
857 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
858 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000859 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
860 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000861 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
862 return false;
863
864 unsigned LHS = CCMI->getOperand(2).getReg();
865 unsigned RHS = CCMI->getOperand(3).getReg();
866 if (!getConstantVRegVal(RHS, MRI))
867 std::swap(RHS, LHS);
868
869 const auto RHSImm = getConstantVRegVal(RHS, MRI);
870 if (!RHSImm || *RHSImm != 0)
871 return false;
872
873 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
874 if (RB.getID() != AArch64::GPRRegBankID)
875 return false;
876
877 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
878 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
879 return false;
880
881 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
882 unsigned CBOpc = 0;
883 if (CmpWidth <= 32)
884 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
885 else if (CmpWidth == 64)
886 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
887 else
888 return false;
889
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000890 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
891 .addUse(LHS)
892 .addMBB(DestMBB)
893 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000894
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000895 I.eraseFromParent();
896 return true;
897}
898
Amara Emerson9bf092d2019-04-09 21:22:43 +0000899bool AArch64InstructionSelector::selectVectorSHL(
900 MachineInstr &I, MachineRegisterInfo &MRI) const {
901 assert(I.getOpcode() == TargetOpcode::G_SHL);
902 unsigned DstReg = I.getOperand(0).getReg();
903 const LLT Ty = MRI.getType(DstReg);
904 unsigned Src1Reg = I.getOperand(1).getReg();
905 unsigned Src2Reg = I.getOperand(2).getReg();
906
907 if (!Ty.isVector())
908 return false;
909
910 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000911 if (Ty == LLT::vector(4, 32)) {
912 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000913 } else if (Ty == LLT::vector(2, 32)) {
914 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000915 } else {
916 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
917 return false;
918 }
919
920 MachineIRBuilder MIB(I);
921 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
922 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
923 I.eraseFromParent();
924 return true;
925}
926
927bool AArch64InstructionSelector::selectVectorASHR(
928 MachineInstr &I, MachineRegisterInfo &MRI) const {
929 assert(I.getOpcode() == TargetOpcode::G_ASHR);
930 unsigned DstReg = I.getOperand(0).getReg();
931 const LLT Ty = MRI.getType(DstReg);
932 unsigned Src1Reg = I.getOperand(1).getReg();
933 unsigned Src2Reg = I.getOperand(2).getReg();
934
935 if (!Ty.isVector())
936 return false;
937
938 // There is not a shift right register instruction, but the shift left
939 // register instruction takes a signed value, where negative numbers specify a
940 // right shift.
941
942 unsigned Opc = 0;
943 unsigned NegOpc = 0;
944 const TargetRegisterClass *RC = nullptr;
945 if (Ty == LLT::vector(4, 32)) {
946 Opc = AArch64::SSHLv4i32;
947 NegOpc = AArch64::NEGv4i32;
948 RC = &AArch64::FPR128RegClass;
949 } else if (Ty == LLT::vector(2, 32)) {
950 Opc = AArch64::SSHLv2i32;
951 NegOpc = AArch64::NEGv2i32;
952 RC = &AArch64::FPR64RegClass;
953 } else {
954 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
955 return false;
956 }
957
958 MachineIRBuilder MIB(I);
959 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
960 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
961 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
962 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
963 I.eraseFromParent();
964 return true;
965}
966
Tim Northovere9600d82017-02-08 17:57:27 +0000967bool AArch64InstructionSelector::selectVaStartAAPCS(
968 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
969 return false;
970}
971
972bool AArch64InstructionSelector::selectVaStartDarwin(
973 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
974 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
975 unsigned ListReg = I.getOperand(0).getReg();
976
977 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
978
979 auto MIB =
980 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
981 .addDef(ArgsAddrReg)
982 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
983 .addImm(0)
984 .addImm(0);
985
986 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
987
988 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
989 .addUse(ArgsAddrReg)
990 .addUse(ListReg)
991 .addImm(0)
992 .addMemOperand(*I.memoperands_begin());
993
994 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
995 I.eraseFromParent();
996 return true;
997}
998
Amara Emerson1e8c1642018-07-31 00:09:02 +0000999void AArch64InstructionSelector::materializeLargeCMVal(
1000 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
1001 MachineBasicBlock &MBB = *I.getParent();
1002 MachineFunction &MF = *MBB.getParent();
1003 MachineRegisterInfo &MRI = MF.getRegInfo();
1004 MachineIRBuilder MIB(I);
1005
Aditya Nandakumarcef44a22018-12-11 00:48:50 +00001006 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +00001007 MovZ->addOperand(MF, I.getOperand(1));
1008 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1009 AArch64II::MO_NC);
1010 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1011 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1012
1013 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
1014 unsigned ForceDstReg) {
1015 unsigned DstReg = ForceDstReg
1016 ? ForceDstReg
1017 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1018 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1019 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1020 MovI->addOperand(MF, MachineOperand::CreateGA(
1021 GV, MovZ->getOperand(1).getOffset(), Flags));
1022 } else {
1023 MovI->addOperand(
1024 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1025 MovZ->getOperand(1).getOffset(), Flags));
1026 }
1027 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1028 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1029 return DstReg;
1030 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +00001031 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +00001032 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1033 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1034 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1035 return;
1036}
1037
Daniel Sandersf76f3152017-11-16 00:46:35 +00001038bool AArch64InstructionSelector::select(MachineInstr &I,
1039 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001040 assert(I.getParent() && "Instruction should be in a basic block!");
1041 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1042
1043 MachineBasicBlock &MBB = *I.getParent();
1044 MachineFunction &MF = *MBB.getParent();
1045 MachineRegisterInfo &MRI = MF.getRegInfo();
1046
Tim Northovercdf23f12016-10-31 18:30:59 +00001047 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001048 // G_PHI requires same handling as PHI
1049 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +00001050 // Certain non-generic instructions also need some special handling.
1051
1052 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1053 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001054
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001055 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +00001056 const unsigned DefReg = I.getOperand(0).getReg();
1057 const LLT DefTy = MRI.getType(DefReg);
1058
1059 const TargetRegisterClass *DefRC = nullptr;
1060 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
1061 DefRC = TRI.getRegClass(DefReg);
1062 } else {
1063 const RegClassOrRegBank &RegClassOrBank =
1064 MRI.getRegClassOrRegBank(DefReg);
1065
1066 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1067 if (!DefRC) {
1068 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001069 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001070 return false;
1071 }
1072 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1073 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1074 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001075 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +00001076 return false;
1077 }
1078 }
1079 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001080 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001081
1082 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1083 }
1084
1085 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001086 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001087
1088 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001089 }
1090
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001091
1092 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001093 LLVM_DEBUG(
1094 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001095 return false;
1096 }
1097
Daniel Sandersf76f3152017-11-16 00:46:35 +00001098 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001099 return true;
1100
Tim Northover32a078a2016-09-15 10:09:59 +00001101 LLT Ty =
1102 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001103
Amara Emerson3739a202019-03-15 21:59:50 +00001104 MachineIRBuilder MIB(I);
1105
Tim Northover69271c62016-10-12 22:49:11 +00001106 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001107 case TargetOpcode::G_BRCOND: {
1108 if (Ty.getSizeInBits() > 32) {
1109 // We shouldn't need this on AArch64, but it would be implemented as an
1110 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1111 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001112 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1113 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001114 return false;
1115 }
1116
1117 const unsigned CondReg = I.getOperand(0).getReg();
1118 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1119
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001120 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1121 // instructions will not be produced, as they are conditional branch
1122 // instructions that do not set flags.
1123 bool ProduceNonFlagSettingCondBr =
1124 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1125 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001126 return true;
1127
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001128 if (ProduceNonFlagSettingCondBr) {
1129 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1130 .addUse(CondReg)
1131 .addImm(/*bit offset=*/0)
1132 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001133
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001134 I.eraseFromParent();
1135 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1136 } else {
1137 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1138 .addDef(AArch64::WZR)
1139 .addUse(CondReg)
1140 .addImm(1);
1141 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1142 auto Bcc =
1143 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1144 .addImm(AArch64CC::EQ)
1145 .addMBB(DestMBB);
1146
1147 I.eraseFromParent();
1148 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1149 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001150 }
1151
Kristof Beyls65a12c02017-01-30 09:13:18 +00001152 case TargetOpcode::G_BRINDIRECT: {
1153 I.setDesc(TII.get(AArch64::BR));
1154 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1155 }
1156
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001157 case TargetOpcode::G_BSWAP: {
1158 // Handle vector types for G_BSWAP directly.
1159 unsigned DstReg = I.getOperand(0).getReg();
1160 LLT DstTy = MRI.getType(DstReg);
1161
1162 // We should only get vector types here; everything else is handled by the
1163 // importer right now.
1164 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1165 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1166 return false;
1167 }
1168
1169 // Only handle 4 and 2 element vectors for now.
1170 // TODO: 16-bit elements.
1171 unsigned NumElts = DstTy.getNumElements();
1172 if (NumElts != 4 && NumElts != 2) {
1173 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1174 return false;
1175 }
1176
1177 // Choose the correct opcode for the supported types. Right now, that's
1178 // v2s32, v4s32, and v2s64.
1179 unsigned Opc = 0;
1180 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1181 if (EltSize == 32)
1182 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1183 : AArch64::REV32v16i8;
1184 else if (EltSize == 64)
1185 Opc = AArch64::REV64v16i8;
1186
1187 // We should always get something by the time we get here...
1188 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1189
1190 I.setDesc(TII.get(Opc));
1191 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1192 }
1193
Tim Northover4494d692016-10-18 19:47:57 +00001194 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001195 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001196 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1197
1198 const LLT s32 = LLT::scalar(32);
1199 const LLT s64 = LLT::scalar(64);
1200 const LLT p0 = LLT::pointer(0, 64);
1201
1202 const unsigned DefReg = I.getOperand(0).getReg();
1203 const LLT DefTy = MRI.getType(DefReg);
1204 const unsigned DefSize = DefTy.getSizeInBits();
1205 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1206
1207 // FIXME: Redundant check, but even less readable when factored out.
1208 if (isFP) {
1209 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001210 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1211 << " constant, expected: " << s32 << " or " << s64
1212 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001213 return false;
1214 }
1215
1216 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001217 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1218 << " constant on bank: " << RB
1219 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001220 return false;
1221 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001222
1223 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1224 // can be sure tablegen works correctly and isn't rescued by this code.
1225 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1226 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001227 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001228 // s32 and s64 are covered by tablegen.
1229 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001230 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1231 << " constant, expected: " << s32 << ", " << s64
1232 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001233 return false;
1234 }
1235
1236 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001237 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1238 << " constant on bank: " << RB
1239 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001240 return false;
1241 }
1242 }
1243
1244 const unsigned MovOpc =
1245 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1246
Tim Northover4494d692016-10-18 19:47:57 +00001247 if (isFP) {
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001248 // Either emit a FMOV, or emit a copy to emit a normal mov.
Tim Northover4494d692016-10-18 19:47:57 +00001249 const TargetRegisterClass &GPRRC =
1250 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1251 const TargetRegisterClass &FPRRC =
1252 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1253
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001254 // Can we use a FMOV instruction to represent the immediate?
1255 if (emitFMovForFConstant(I, MRI))
1256 return true;
1257
1258 // Nope. Emit a copy and use a normal mov instead.
Tim Northover4494d692016-10-18 19:47:57 +00001259 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1260 MachineOperand &RegOp = I.getOperand(0);
1261 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001262 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1263 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001264
1265 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001266 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001267 return false;
1268 }
1269
1270 MachineOperand &ImmOp = I.getOperand(1);
1271 // FIXME: Is going through int64_t always correct?
1272 ImmOp.ChangeToImmediate(
1273 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001274 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001275 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1276 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001277 } else if (I.getOperand(1).isImm()) {
1278 uint64_t Val = I.getOperand(1).getImm();
1279 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001280 }
1281
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001282 I.setDesc(TII.get(MovOpc));
Tim Northover4494d692016-10-18 19:47:57 +00001283 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1284 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001285 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001286 case TargetOpcode::G_EXTRACT: {
1287 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001288 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001289 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001290 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001291 // Larger extracts are vectors, same-size extracts should be something else
1292 // by now (either split up or simplified to a COPY).
1293 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1294 return false;
1295
Amara Emersonbc03bae2018-02-18 17:03:02 +00001296 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001297 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1298 Ty.getSizeInBits() - 1);
1299
Amara Emersonbc03bae2018-02-18 17:03:02 +00001300 if (SrcSize < 64) {
1301 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1302 "unexpected G_EXTRACT types");
1303 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1304 }
1305
Tim Northover7b6d66c2017-07-20 22:58:38 +00001306 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001307 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001308 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1309 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001310 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1311 AArch64::GPR32RegClass, MRI);
1312 I.getOperand(0).setReg(DstReg);
1313
1314 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1315 }
1316
1317 case TargetOpcode::G_INSERT: {
1318 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001319 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1320 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001321 // Larger inserts are vectors, same-size ones should be something else by
1322 // now (split up or turned into COPYs).
1323 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1324 return false;
1325
Amara Emersonbc03bae2018-02-18 17:03:02 +00001326 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001327 unsigned LSB = I.getOperand(3).getImm();
1328 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001329 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001330 MachineInstrBuilder(MF, I).addImm(Width - 1);
1331
Amara Emersonbc03bae2018-02-18 17:03:02 +00001332 if (DstSize < 64) {
1333 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1334 "unexpected G_INSERT types");
1335 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1336 }
1337
Tim Northover7b6d66c2017-07-20 22:58:38 +00001338 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1339 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1340 TII.get(AArch64::SUBREG_TO_REG))
1341 .addDef(SrcReg)
1342 .addImm(0)
1343 .addUse(I.getOperand(2).getReg())
1344 .addImm(AArch64::sub_32);
1345 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1346 AArch64::GPR32RegClass, MRI);
1347 I.getOperand(2).setReg(SrcReg);
1348
1349 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1350 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001351 case TargetOpcode::G_FRAME_INDEX: {
1352 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001353 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001354 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1355 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001356 return false;
1357 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001358 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001359
1360 // MOs for a #0 shifted immediate.
1361 I.addOperand(MachineOperand::CreateImm(0));
1362 I.addOperand(MachineOperand::CreateImm(0));
1363
1364 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1365 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001366
1367 case TargetOpcode::G_GLOBAL_VALUE: {
1368 auto GV = I.getOperand(1).getGlobal();
1369 if (GV->isThreadLocal()) {
1370 // FIXME: we don't support TLS yet.
1371 return false;
1372 }
1373 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001374 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001375 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001376 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001377 } else if (TM.getCodeModel() == CodeModel::Large) {
1378 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001379 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001380 I.eraseFromParent();
1381 return true;
David Green9dd1d452018-08-22 11:31:39 +00001382 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1383 I.setDesc(TII.get(AArch64::ADR));
1384 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001385 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001386 I.setDesc(TII.get(AArch64::MOVaddr));
1387 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1388 MachineInstrBuilder MIB(MF, I);
1389 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1390 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1391 }
1392 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1393 }
1394
Amara Emersond3144a42019-06-06 07:58:37 +00001395 case TargetOpcode::G_ZEXTLOAD:
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001396 case TargetOpcode::G_LOAD:
1397 case TargetOpcode::G_STORE: {
Amara Emersond3144a42019-06-06 07:58:37 +00001398 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1399 MachineIRBuilder MIB(I);
1400
Tim Northover0f140c72016-09-09 11:46:34 +00001401 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001402
Tim Northover5ae83502016-09-15 09:20:34 +00001403 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001404 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1405 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001406 return false;
1407 }
1408
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001409 auto &MemOp = **I.memoperands_begin();
1410 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001411 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001412 return false;
1413 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001414 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001415
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001416 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001417#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001418 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001419 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001420 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1421 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001422 assert(MRI.getType(PtrReg).isPointer() &&
1423 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001424#endif
1425
1426 const unsigned ValReg = I.getOperand(0).getReg();
1427 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1428
1429 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001430 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001431 if (NewOpc == I.getOpcode())
1432 return false;
1433
1434 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001435
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001436 uint64_t Offset = 0;
1437 auto *PtrMI = MRI.getVRegDef(PtrReg);
1438
1439 // Try to fold a GEP into our unsigned immediate addressing mode.
1440 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1441 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1442 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001443 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001444 const unsigned Scale = Log2_32(Size);
1445 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1446 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1447 I.getOperand(1).setReg(Ptr2Reg);
1448 PtrMI = MRI.getVRegDef(Ptr2Reg);
1449 Offset = Imm / Size;
1450 }
1451 }
1452 }
1453
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001454 // If we haven't folded anything into our addressing mode yet, try to fold
1455 // a frame index into the base+offset.
1456 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1457 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1458
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001459 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001460
1461 // If we're storing a 0, use WZR/XZR.
1462 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1463 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1464 if (I.getOpcode() == AArch64::STRWui)
1465 I.getOperand(0).setReg(AArch64::WZR);
1466 else if (I.getOpcode() == AArch64::STRXui)
1467 I.getOperand(0).setReg(AArch64::XZR);
1468 }
1469 }
1470
Amara Emersond3144a42019-06-06 07:58:37 +00001471 if (IsZExtLoad) {
1472 // The zextload from a smaller type to i32 should be handled by the importer.
1473 if (MRI.getType(ValReg).getSizeInBits() != 64)
1474 return false;
1475 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1476 //and zero_extend with SUBREG_TO_REG.
1477 unsigned LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1478 unsigned DstReg = I.getOperand(0).getReg();
1479 I.getOperand(0).setReg(LdReg);
1480
1481 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1482 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1483 .addImm(0)
1484 .addUse(LdReg)
1485 .addImm(AArch64::sub_32);
1486 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1487 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1488 MRI);
1489 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001490 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1491 }
1492
Tim Northover9dd78f82017-02-08 21:22:25 +00001493 case TargetOpcode::G_SMULH:
1494 case TargetOpcode::G_UMULH: {
1495 // Reject the various things we don't support yet.
1496 if (unsupportedBinOp(I, RBI, MRI, TRI))
1497 return false;
1498
1499 const unsigned DefReg = I.getOperand(0).getReg();
1500 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1501
1502 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001503 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001504 return false;
1505 }
1506
1507 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001508 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1509 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001510 return false;
1511 }
1512
1513 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1514 : AArch64::UMULHrr;
1515 I.setDesc(TII.get(NewOpc));
1516
1517 // Now that we selected an opcode, we need to constrain the register
1518 // operands to use appropriate classes.
1519 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1520 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001521 case TargetOpcode::G_FADD:
1522 case TargetOpcode::G_FSUB:
1523 case TargetOpcode::G_FMUL:
1524 case TargetOpcode::G_FDIV:
1525
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001526 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001527 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1528 return selectVectorASHR(I, MRI);
1529 LLVM_FALLTHROUGH;
1530 case TargetOpcode::G_SHL:
1531 if (Opcode == TargetOpcode::G_SHL &&
1532 MRI.getType(I.getOperand(0).getReg()).isVector())
1533 return selectVectorSHL(I, MRI);
1534 LLVM_FALLTHROUGH;
1535 case TargetOpcode::G_OR:
1536 case TargetOpcode::G_LSHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001537 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001538 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001539 if (unsupportedBinOp(I, RBI, MRI, TRI))
1540 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001541
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001542 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001543
1544 const unsigned DefReg = I.getOperand(0).getReg();
1545 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1546
1547 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1548 if (NewOpc == I.getOpcode())
1549 return false;
1550
1551 I.setDesc(TII.get(NewOpc));
1552 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001553
1554 // Now that we selected an opcode, we need to constrain the register
1555 // operands to use appropriate classes.
1556 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1557 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001558
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001559 case TargetOpcode::G_UADDO: {
1560 // TODO: Support other types.
1561 unsigned OpSize = Ty.getSizeInBits();
1562 if (OpSize != 32 && OpSize != 64) {
1563 LLVM_DEBUG(
1564 dbgs()
1565 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1566 return false;
1567 }
1568
1569 // TODO: Support vectors.
1570 if (Ty.isVector()) {
1571 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1572 return false;
1573 }
1574
1575 // Add and set the set condition flag.
1576 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1577 MachineIRBuilder MIRBuilder(I);
1578 auto AddsMI = MIRBuilder.buildInstr(
1579 AddsOpc, {I.getOperand(0).getReg()},
1580 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1581 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1582
1583 // Now, put the overflow result in the register given by the first operand
1584 // to the G_UADDO. CSINC increments the result when the predicate is false,
1585 // so to get the increment when it's true, we need to use the inverse. In
1586 // this case, we want to increment when carry is set.
1587 auto CsetMI = MIRBuilder
1588 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1589 {AArch64::WZR, AArch64::WZR})
1590 .addImm(getInvertedCondCode(AArch64CC::HS));
1591 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1592 I.eraseFromParent();
1593 return true;
1594 }
1595
Tim Northover398c5f52017-02-14 20:56:29 +00001596 case TargetOpcode::G_PTR_MASK: {
1597 uint64_t Align = I.getOperand(2).getImm();
1598 if (Align >= 64 || Align == 0)
1599 return false;
1600
1601 uint64_t Mask = ~((1ULL << Align) - 1);
1602 I.setDesc(TII.get(AArch64::ANDXri));
1603 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1604
1605 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1606 }
Tim Northover037af52c2016-10-31 18:31:09 +00001607 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001608 case TargetOpcode::G_TRUNC: {
1609 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1610 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1611
1612 const unsigned DstReg = I.getOperand(0).getReg();
1613 const unsigned SrcReg = I.getOperand(1).getReg();
1614
1615 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1616 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1617
1618 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001619 LLVM_DEBUG(
1620 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001621 return false;
1622 }
1623
1624 if (DstRB.getID() == AArch64::GPRRegBankID) {
1625 const TargetRegisterClass *DstRC =
1626 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1627 if (!DstRC)
1628 return false;
1629
1630 const TargetRegisterClass *SrcRC =
1631 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1632 if (!SrcRC)
1633 return false;
1634
1635 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1636 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001637 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001638 return false;
1639 }
1640
1641 if (DstRC == SrcRC) {
1642 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001643 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1644 SrcTy == LLT::scalar(64)) {
1645 llvm_unreachable("TableGen can import this case");
1646 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001647 } else if (DstRC == &AArch64::GPR32RegClass &&
1648 SrcRC == &AArch64::GPR64RegClass) {
1649 I.getOperand(1).setSubReg(AArch64::sub_32);
1650 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001651 LLVM_DEBUG(
1652 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001653 return false;
1654 }
1655
1656 I.setDesc(TII.get(TargetOpcode::COPY));
1657 return true;
1658 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1659 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1660 I.setDesc(TII.get(AArch64::XTNv4i16));
1661 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1662 return true;
1663 }
1664 }
1665
1666 return false;
1667 }
1668
Tim Northover3d38b3a2016-10-11 20:50:21 +00001669 case TargetOpcode::G_ANYEXT: {
1670 const unsigned DstReg = I.getOperand(0).getReg();
1671 const unsigned SrcReg = I.getOperand(1).getReg();
1672
Quentin Colombetcb629a82016-10-12 03:57:49 +00001673 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1674 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001675 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1676 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001677 return false;
1678 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001679
Quentin Colombetcb629a82016-10-12 03:57:49 +00001680 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1681 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001682 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1683 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001684 return false;
1685 }
1686
1687 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1688
1689 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001690 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001691 return false;
1692 }
1693
Quentin Colombetcb629a82016-10-12 03:57:49 +00001694 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001695 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1696 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001697 return false;
1698 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001699 // At this point G_ANYEXT is just like a plain COPY, but we need
1700 // to explicitly form the 64-bit value if any.
1701 if (DstSize > 32) {
1702 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1703 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1704 .addDef(ExtSrc)
1705 .addImm(0)
1706 .addUse(SrcReg)
1707 .addImm(AArch64::sub_32);
1708 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001709 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001710 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001711 }
1712
1713 case TargetOpcode::G_ZEXT:
1714 case TargetOpcode::G_SEXT: {
1715 unsigned Opcode = I.getOpcode();
1716 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1717 SrcTy = MRI.getType(I.getOperand(1).getReg());
1718 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1719 const unsigned DefReg = I.getOperand(0).getReg();
1720 const unsigned SrcReg = I.getOperand(1).getReg();
1721 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1722
1723 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001724 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1725 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001726 return false;
1727 }
1728
1729 MachineInstr *ExtI;
1730 if (DstTy == LLT::scalar(64)) {
1731 // FIXME: Can we avoid manually doing this?
1732 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001733 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1734 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001735 return false;
1736 }
1737
1738 const unsigned SrcXReg =
1739 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1740 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1741 .addDef(SrcXReg)
1742 .addImm(0)
1743 .addUse(SrcReg)
1744 .addImm(AArch64::sub_32);
1745
1746 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1747 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1748 .addDef(DefReg)
1749 .addUse(SrcXReg)
1750 .addImm(0)
1751 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001752 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001753 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1754 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1755 .addDef(DefReg)
1756 .addUse(SrcReg)
1757 .addImm(0)
1758 .addImm(SrcTy.getSizeInBits() - 1);
1759 } else {
1760 return false;
1761 }
1762
1763 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1764
1765 I.eraseFromParent();
1766 return true;
1767 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001768
Tim Northover69271c62016-10-12 22:49:11 +00001769 case TargetOpcode::G_SITOFP:
1770 case TargetOpcode::G_UITOFP:
1771 case TargetOpcode::G_FPTOSI:
1772 case TargetOpcode::G_FPTOUI: {
1773 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1774 SrcTy = MRI.getType(I.getOperand(1).getReg());
1775 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1776 if (NewOpc == Opcode)
1777 return false;
1778
1779 I.setDesc(TII.get(NewOpc));
1780 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1781
1782 return true;
1783 }
1784
1785
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001786 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001787 // The importer is currently unable to import pointer types since they
1788 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001789 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001790
Daniel Sandersedd07842017-08-17 09:26:14 +00001791 case TargetOpcode::G_BITCAST:
1792 // Imported SelectionDAG rules can handle every bitcast except those that
1793 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00001794 // but we might not run an optimizer that deletes them. The other exception
1795 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
1796 // of them.
1797 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00001798
Tim Northover9ac0eba2016-11-08 00:45:29 +00001799 case TargetOpcode::G_SELECT: {
1800 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001801 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1802 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001803 return false;
1804 }
1805
1806 const unsigned CondReg = I.getOperand(1).getReg();
1807 const unsigned TReg = I.getOperand(2).getReg();
1808 const unsigned FReg = I.getOperand(3).getReg();
1809
Jessica Paquette910630c2019-05-03 22:37:46 +00001810 // If we have a floating-point result, then we should use a floating point
1811 // select instead of an integer select.
1812 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
1813 AArch64::GPRRegBankID);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001814
Amara Emersonc37ff0d2019-06-05 23:46:16 +00001815 if (IsFP && tryOptSelect(I))
1816 return true;
Tim Northover9ac0eba2016-11-08 00:45:29 +00001817
Amara Emersonc37ff0d2019-06-05 23:46:16 +00001818 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
Tim Northover9ac0eba2016-11-08 00:45:29 +00001819 MachineInstr &TstMI =
1820 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1821 .addDef(AArch64::WZR)
1822 .addUse(CondReg)
1823 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1824
1825 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1826 .addDef(I.getOperand(0).getReg())
1827 .addUse(TReg)
1828 .addUse(FReg)
1829 .addImm(AArch64CC::NE);
1830
1831 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1832 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1833
1834 I.eraseFromParent();
1835 return true;
1836 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001837 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00001838 if (Ty.isVector())
1839 return selectVectorICmp(I, MRI);
1840
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001841 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001842 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1843 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001844 return false;
1845 }
1846
1847 unsigned CmpOpc = 0;
1848 unsigned ZReg = 0;
1849
1850 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1851 if (CmpTy == LLT::scalar(32)) {
1852 CmpOpc = AArch64::SUBSWrr;
1853 ZReg = AArch64::WZR;
1854 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1855 CmpOpc = AArch64::SUBSXrr;
1856 ZReg = AArch64::XZR;
1857 } else {
1858 return false;
1859 }
1860
Kristof Beyls22524402017-01-05 10:16:08 +00001861 // CSINC increments the result by one when the condition code is false.
1862 // Therefore, we have to invert the predicate to get an increment by 1 when
1863 // the predicate is true.
1864 const AArch64CC::CondCode invCC =
1865 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1866 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001867
1868 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1869 .addDef(ZReg)
1870 .addUse(I.getOperand(2).getReg())
1871 .addUse(I.getOperand(3).getReg());
1872
1873 MachineInstr &CSetMI =
1874 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1875 .addDef(I.getOperand(0).getReg())
1876 .addUse(AArch64::WZR)
1877 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001878 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001879
1880 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1881 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1882
1883 I.eraseFromParent();
1884 return true;
1885 }
1886
Tim Northover7dd378d2016-10-12 22:49:07 +00001887 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001888 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001889 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1890 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001891 return false;
1892 }
1893
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001894 unsigned CmpOpc = selectFCMPOpc(I, MRI);
1895 if (!CmpOpc)
Tim Northover7dd378d2016-10-12 22:49:07 +00001896 return false;
Tim Northover7dd378d2016-10-12 22:49:07 +00001897
1898 // FIXME: regbank
1899
1900 AArch64CC::CondCode CC1, CC2;
1901 changeFCMPPredToAArch64CC(
1902 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1903
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001904 // Partially build the compare. Decide if we need to add a use for the
1905 // third operand based off whether or not we're comparing against 0.0.
1906 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1907 .addUse(I.getOperand(2).getReg());
1908
1909 // If we don't have an immediate compare, then we need to add a use of the
1910 // register which wasn't used for the immediate.
1911 // Note that the immediate will always be the last operand.
1912 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
1913 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
Tim Northover7dd378d2016-10-12 22:49:07 +00001914
1915 const unsigned DefReg = I.getOperand(0).getReg();
1916 unsigned Def1Reg = DefReg;
1917 if (CC2 != AArch64CC::AL)
1918 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1919
1920 MachineInstr &CSetMI =
1921 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1922 .addDef(Def1Reg)
1923 .addUse(AArch64::WZR)
1924 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001925 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001926
1927 if (CC2 != AArch64CC::AL) {
1928 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1929 MachineInstr &CSet2MI =
1930 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1931 .addDef(Def2Reg)
1932 .addUse(AArch64::WZR)
1933 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001934 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001935 MachineInstr &OrMI =
1936 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1937 .addDef(DefReg)
1938 .addUse(Def1Reg)
1939 .addUse(Def2Reg);
1940 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1941 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1942 }
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00001943 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover7dd378d2016-10-12 22:49:07 +00001944 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1945
1946 I.eraseFromParent();
1947 return true;
1948 }
Tim Northovere9600d82017-02-08 17:57:27 +00001949 case TargetOpcode::G_VASTART:
1950 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1951 : selectVaStartAAPCS(I, MF, MRI);
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00001952 case TargetOpcode::G_INTRINSIC:
1953 return selectIntrinsic(I, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001954 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001955 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001956 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001957 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001958 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1959 const unsigned DstReg = I.getOperand(0).getReg();
1960 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1961 const TargetRegisterClass *DstRC =
1962 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1963 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001964 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001965 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001966 case TargetOpcode::G_BLOCK_ADDR: {
1967 if (TM.getCodeModel() == CodeModel::Large) {
1968 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1969 I.eraseFromParent();
1970 return true;
1971 } else {
1972 I.setDesc(TII.get(AArch64::MOVaddrBA));
1973 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1974 I.getOperand(0).getReg())
1975 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1976 /* Offset */ 0, AArch64II::MO_PAGE)
1977 .addBlockAddress(
1978 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1979 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1980 I.eraseFromParent();
1981 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1982 }
1983 }
Jessica Paquette991cb392019-04-23 20:46:19 +00001984 case TargetOpcode::G_INTRINSIC_TRUNC:
1985 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00001986 case TargetOpcode::G_INTRINSIC_ROUND:
1987 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00001988 case TargetOpcode::G_BUILD_VECTOR:
1989 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001990 case TargetOpcode::G_MERGE_VALUES:
1991 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001992 case TargetOpcode::G_UNMERGE_VALUES:
1993 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001994 case TargetOpcode::G_SHUFFLE_VECTOR:
1995 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001996 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1997 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001998 case TargetOpcode::G_INSERT_VECTOR_ELT:
1999 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00002000 case TargetOpcode::G_CONCAT_VECTORS:
2001 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002002 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002003
2004 return false;
2005}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002006
Jessica Paquette991cb392019-04-23 20:46:19 +00002007bool AArch64InstructionSelector::selectIntrinsicTrunc(
2008 MachineInstr &I, MachineRegisterInfo &MRI) const {
2009 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2010
2011 // Select the correct opcode.
2012 unsigned Opc = 0;
2013 if (!SrcTy.isVector()) {
2014 switch (SrcTy.getSizeInBits()) {
2015 default:
2016 case 16:
2017 Opc = AArch64::FRINTZHr;
2018 break;
2019 case 32:
2020 Opc = AArch64::FRINTZSr;
2021 break;
2022 case 64:
2023 Opc = AArch64::FRINTZDr;
2024 break;
2025 }
2026 } else {
2027 unsigned NumElts = SrcTy.getNumElements();
2028 switch (SrcTy.getElementType().getSizeInBits()) {
2029 default:
2030 break;
2031 case 16:
2032 if (NumElts == 4)
2033 Opc = AArch64::FRINTZv4f16;
2034 else if (NumElts == 8)
2035 Opc = AArch64::FRINTZv8f16;
2036 break;
2037 case 32:
2038 if (NumElts == 2)
2039 Opc = AArch64::FRINTZv2f32;
2040 else if (NumElts == 4)
2041 Opc = AArch64::FRINTZv4f32;
2042 break;
2043 case 64:
2044 if (NumElts == 2)
2045 Opc = AArch64::FRINTZv2f64;
2046 break;
2047 }
2048 }
2049
2050 if (!Opc) {
2051 // Didn't get an opcode above, bail.
2052 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2053 return false;
2054 }
2055
2056 // Legalization would have set us up perfectly for this; we just need to
2057 // set the opcode and move on.
2058 I.setDesc(TII.get(Opc));
2059 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2060}
2061
Jessica Paquette4fe75742019-04-23 23:03:03 +00002062bool AArch64InstructionSelector::selectIntrinsicRound(
2063 MachineInstr &I, MachineRegisterInfo &MRI) const {
2064 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2065
2066 // Select the correct opcode.
2067 unsigned Opc = 0;
2068 if (!SrcTy.isVector()) {
2069 switch (SrcTy.getSizeInBits()) {
2070 default:
2071 case 16:
2072 Opc = AArch64::FRINTAHr;
2073 break;
2074 case 32:
2075 Opc = AArch64::FRINTASr;
2076 break;
2077 case 64:
2078 Opc = AArch64::FRINTADr;
2079 break;
2080 }
2081 } else {
2082 unsigned NumElts = SrcTy.getNumElements();
2083 switch (SrcTy.getElementType().getSizeInBits()) {
2084 default:
2085 break;
2086 case 16:
2087 if (NumElts == 4)
2088 Opc = AArch64::FRINTAv4f16;
2089 else if (NumElts == 8)
2090 Opc = AArch64::FRINTAv8f16;
2091 break;
2092 case 32:
2093 if (NumElts == 2)
2094 Opc = AArch64::FRINTAv2f32;
2095 else if (NumElts == 4)
2096 Opc = AArch64::FRINTAv4f32;
2097 break;
2098 case 64:
2099 if (NumElts == 2)
2100 Opc = AArch64::FRINTAv2f64;
2101 break;
2102 }
2103 }
2104
2105 if (!Opc) {
2106 // Didn't get an opcode above, bail.
2107 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2108 return false;
2109 }
2110
2111 // Legalization would have set us up perfectly for this; we just need to
2112 // set the opcode and move on.
2113 I.setDesc(TII.get(Opc));
2114 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2115}
2116
Amara Emerson9bf092d2019-04-09 21:22:43 +00002117bool AArch64InstructionSelector::selectVectorICmp(
2118 MachineInstr &I, MachineRegisterInfo &MRI) const {
2119 unsigned DstReg = I.getOperand(0).getReg();
2120 LLT DstTy = MRI.getType(DstReg);
2121 unsigned SrcReg = I.getOperand(2).getReg();
2122 unsigned Src2Reg = I.getOperand(3).getReg();
2123 LLT SrcTy = MRI.getType(SrcReg);
2124
2125 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2126 unsigned NumElts = DstTy.getNumElements();
2127
2128 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2129 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2130 // Third index is cc opcode:
2131 // 0 == eq
2132 // 1 == ugt
2133 // 2 == uge
2134 // 3 == ult
2135 // 4 == ule
2136 // 5 == sgt
2137 // 6 == sge
2138 // 7 == slt
2139 // 8 == sle
2140 // ne is done by negating 'eq' result.
2141
2142 // This table below assumes that for some comparisons the operands will be
2143 // commuted.
2144 // ult op == commute + ugt op
2145 // ule op == commute + uge op
2146 // slt op == commute + sgt op
2147 // sle op == commute + sge op
2148 unsigned PredIdx = 0;
2149 bool SwapOperands = false;
2150 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2151 switch (Pred) {
2152 case CmpInst::ICMP_NE:
2153 case CmpInst::ICMP_EQ:
2154 PredIdx = 0;
2155 break;
2156 case CmpInst::ICMP_UGT:
2157 PredIdx = 1;
2158 break;
2159 case CmpInst::ICMP_UGE:
2160 PredIdx = 2;
2161 break;
2162 case CmpInst::ICMP_ULT:
2163 PredIdx = 3;
2164 SwapOperands = true;
2165 break;
2166 case CmpInst::ICMP_ULE:
2167 PredIdx = 4;
2168 SwapOperands = true;
2169 break;
2170 case CmpInst::ICMP_SGT:
2171 PredIdx = 5;
2172 break;
2173 case CmpInst::ICMP_SGE:
2174 PredIdx = 6;
2175 break;
2176 case CmpInst::ICMP_SLT:
2177 PredIdx = 7;
2178 SwapOperands = true;
2179 break;
2180 case CmpInst::ICMP_SLE:
2181 PredIdx = 8;
2182 SwapOperands = true;
2183 break;
2184 default:
2185 llvm_unreachable("Unhandled icmp predicate");
2186 return false;
2187 }
2188
2189 // This table obviously should be tablegen'd when we have our GISel native
2190 // tablegen selector.
2191
2192 static const unsigned OpcTable[4][4][9] = {
2193 {
2194 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2195 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2196 0 /* invalid */},
2197 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2198 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2199 0 /* invalid */},
2200 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2201 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2202 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2203 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2204 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2205 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2206 },
2207 {
2208 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2209 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2210 0 /* invalid */},
2211 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2212 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2213 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2214 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2215 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2216 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2217 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2218 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2219 0 /* invalid */}
2220 },
2221 {
2222 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2223 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2224 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2225 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2226 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2227 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2228 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2229 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2230 0 /* invalid */},
2231 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2232 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2233 0 /* invalid */}
2234 },
2235 {
2236 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2237 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2238 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2239 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2240 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2241 0 /* invalid */},
2242 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2243 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2244 0 /* invalid */},
2245 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2246 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2247 0 /* invalid */}
2248 },
2249 };
2250 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2251 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2252 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2253 if (!Opc) {
2254 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2255 return false;
2256 }
2257
2258 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2259 const TargetRegisterClass *SrcRC =
2260 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2261 if (!SrcRC) {
2262 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2263 return false;
2264 }
2265
2266 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2267 if (SrcTy.getSizeInBits() == 128)
2268 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2269
2270 if (SwapOperands)
2271 std::swap(SrcReg, Src2Reg);
2272
2273 MachineIRBuilder MIB(I);
2274 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2275 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2276
2277 // Invert if we had a 'ne' cc.
2278 if (NotOpc) {
2279 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2280 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2281 } else {
2282 MIB.buildCopy(DstReg, Cmp.getReg(0));
2283 }
2284 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2285 I.eraseFromParent();
2286 return true;
2287}
2288
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002289MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00002290 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002291 MachineIRBuilder &MIRBuilder) const {
2292 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002293
2294 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002295 auto Ins =
2296 MIRBuilder
2297 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2298 .addImm(SubregIndex);
2299 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2300 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2301 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002302 };
2303
Amara Emerson8acb0d92019-03-04 19:16:00 +00002304 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002305 case 16:
2306 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002307 case 32:
2308 return BuildFn(AArch64::ssub);
2309 case 64:
2310 return BuildFn(AArch64::dsub);
2311 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002312 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002313 }
2314}
2315
Amara Emerson8cb186c2018-12-20 01:11:04 +00002316bool AArch64InstructionSelector::selectMergeValues(
2317 MachineInstr &I, MachineRegisterInfo &MRI) const {
2318 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2319 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2320 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2321 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
2322
2323 // At the moment we only support merging two s32s into an s64.
2324 if (I.getNumOperands() != 3)
2325 return false;
2326 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2327 return false;
2328 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
2329 if (RB.getID() != AArch64::GPRRegBankID)
2330 return false;
2331
2332 auto *DstRC = &AArch64::GPR64RegClass;
2333 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
2334 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2335 TII.get(TargetOpcode::SUBREG_TO_REG))
2336 .addDef(SubToRegDef)
2337 .addImm(0)
2338 .addUse(I.getOperand(1).getReg())
2339 .addImm(AArch64::sub_32);
2340 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
2341 // Need to anyext the second scalar before we can use bfm
2342 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2343 TII.get(TargetOpcode::SUBREG_TO_REG))
2344 .addDef(SubToRegDef2)
2345 .addImm(0)
2346 .addUse(I.getOperand(2).getReg())
2347 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002348 MachineInstr &BFM =
2349 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002350 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002351 .addUse(SubToRegDef)
2352 .addUse(SubToRegDef2)
2353 .addImm(32)
2354 .addImm(31);
2355 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2356 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2357 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2358 I.eraseFromParent();
2359 return true;
2360}
2361
Jessica Paquette607774c2019-03-11 22:18:01 +00002362static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2363 const unsigned EltSize) {
2364 // Choose a lane copy opcode and subregister based off of the size of the
2365 // vector's elements.
2366 switch (EltSize) {
2367 case 16:
2368 CopyOpc = AArch64::CPYi16;
2369 ExtractSubReg = AArch64::hsub;
2370 break;
2371 case 32:
2372 CopyOpc = AArch64::CPYi32;
2373 ExtractSubReg = AArch64::ssub;
2374 break;
2375 case 64:
2376 CopyOpc = AArch64::CPYi64;
2377 ExtractSubReg = AArch64::dsub;
2378 break;
2379 default:
2380 // Unknown size, bail out.
2381 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2382 return false;
2383 }
2384 return true;
2385}
2386
Amara Emersond61b89b2019-03-14 22:48:18 +00002387MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
2388 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2389 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
2390 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2391 unsigned CopyOpc = 0;
2392 unsigned ExtractSubReg = 0;
2393 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2394 LLVM_DEBUG(
2395 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2396 return nullptr;
2397 }
2398
2399 const TargetRegisterClass *DstRC =
2400 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2401 if (!DstRC) {
2402 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2403 return nullptr;
2404 }
2405
2406 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2407 const LLT &VecTy = MRI.getType(VecReg);
2408 const TargetRegisterClass *VecRC =
2409 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2410 if (!VecRC) {
2411 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2412 return nullptr;
2413 }
2414
2415 // The register that we're going to copy into.
2416 unsigned InsertReg = VecReg;
2417 if (!DstReg)
2418 DstReg = MRI.createVirtualRegister(DstRC);
2419 // If the lane index is 0, we just use a subregister COPY.
2420 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002421 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2422 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002423 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002424 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002425 }
2426
2427 // Lane copies require 128-bit wide registers. If we're dealing with an
2428 // unpacked vector, then we need to move up to that width. Insert an implicit
2429 // def and a subregister insert to get us there.
2430 if (VecTy.getSizeInBits() != 128) {
2431 MachineInstr *ScalarToVector = emitScalarToVector(
2432 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2433 if (!ScalarToVector)
2434 return nullptr;
2435 InsertReg = ScalarToVector->getOperand(0).getReg();
2436 }
2437
2438 MachineInstr *LaneCopyMI =
2439 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2440 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2441
2442 // Make sure that we actually constrain the initial copy.
2443 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2444 return LaneCopyMI;
2445}
2446
Jessica Paquette607774c2019-03-11 22:18:01 +00002447bool AArch64InstructionSelector::selectExtractElt(
2448 MachineInstr &I, MachineRegisterInfo &MRI) const {
2449 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2450 "unexpected opcode!");
2451 unsigned DstReg = I.getOperand(0).getReg();
2452 const LLT NarrowTy = MRI.getType(DstReg);
2453 const unsigned SrcReg = I.getOperand(1).getReg();
2454 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002455 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002456 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2457 "source register size too small!");
2458 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2459
2460 // Need the lane index to determine the correct copy opcode.
2461 MachineOperand &LaneIdxOp = I.getOperand(2);
2462 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2463
2464 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2465 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2466 return false;
2467 }
2468
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002469 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002470 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2471 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002472 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002473 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002474
Jessica Paquette607774c2019-03-11 22:18:01 +00002475 MachineIRBuilder MIRBuilder(I);
2476
Amara Emersond61b89b2019-03-14 22:48:18 +00002477 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2478 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2479 LaneIdx, MIRBuilder);
2480 if (!Extract)
2481 return false;
2482
2483 I.eraseFromParent();
2484 return true;
2485}
2486
2487bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2488 MachineInstr &I, MachineRegisterInfo &MRI) const {
2489 unsigned NumElts = I.getNumOperands() - 1;
2490 unsigned SrcReg = I.getOperand(NumElts).getReg();
2491 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2492 const LLT SrcTy = MRI.getType(SrcReg);
2493
2494 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2495 if (SrcTy.getSizeInBits() > 128) {
2496 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2497 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002498 }
2499
Amara Emersond61b89b2019-03-14 22:48:18 +00002500 MachineIRBuilder MIB(I);
2501
2502 // We implement a split vector operation by treating the sub-vectors as
2503 // scalars and extracting them.
2504 const RegisterBank &DstRB =
2505 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2506 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2507 unsigned Dst = I.getOperand(OpIdx).getReg();
2508 MachineInstr *Extract =
2509 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2510 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002511 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002512 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002513 I.eraseFromParent();
2514 return true;
2515}
2516
Jessica Paquette245047d2019-01-24 22:00:41 +00002517bool AArch64InstructionSelector::selectUnmergeValues(
2518 MachineInstr &I, MachineRegisterInfo &MRI) const {
2519 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2520 "unexpected opcode");
2521
2522 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2523 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2524 AArch64::FPRRegBankID ||
2525 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2526 AArch64::FPRRegBankID) {
2527 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2528 "currently unsupported.\n");
2529 return false;
2530 }
2531
2532 // The last operand is the vector source register, and every other operand is
2533 // a register to unpack into.
2534 unsigned NumElts = I.getNumOperands() - 1;
2535 unsigned SrcReg = I.getOperand(NumElts).getReg();
2536 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2537 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002538 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002539 assert(WideTy.isVector() && "can only unmerge from vector types!");
2540 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2541 "source register size too small!");
2542
Amara Emersond61b89b2019-03-14 22:48:18 +00002543 if (!NarrowTy.isScalar())
2544 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002545
Amara Emerson3739a202019-03-15 21:59:50 +00002546 MachineIRBuilder MIB(I);
2547
Jessica Paquette245047d2019-01-24 22:00:41 +00002548 // Choose a lane copy opcode and subregister based off of the size of the
2549 // vector's elements.
2550 unsigned CopyOpc = 0;
2551 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002552 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002553 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002554
2555 // Set up for the lane copies.
2556 MachineBasicBlock &MBB = *I.getParent();
2557
2558 // Stores the registers we'll be copying from.
2559 SmallVector<unsigned, 4> InsertRegs;
2560
2561 // We'll use the first register twice, so we only need NumElts-1 registers.
2562 unsigned NumInsertRegs = NumElts - 1;
2563
2564 // If our elements fit into exactly 128 bits, then we can copy from the source
2565 // directly. Otherwise, we need to do a bit of setup with some subregister
2566 // inserts.
2567 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2568 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2569 } else {
2570 // No. We have to perform subregister inserts. For each insert, create an
2571 // implicit def and a subregister insert, and save the register we create.
2572 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2573 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2574 MachineInstr &ImpDefMI =
2575 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2576 ImpDefReg);
2577
2578 // Now, create the subregister insert from SrcReg.
2579 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2580 MachineInstr &InsMI =
2581 *BuildMI(MBB, I, I.getDebugLoc(),
2582 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2583 .addUse(ImpDefReg)
2584 .addUse(SrcReg)
2585 .addImm(AArch64::dsub);
2586
2587 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2588 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2589
2590 // Save the register so that we can copy from it after.
2591 InsertRegs.push_back(InsertReg);
2592 }
2593 }
2594
2595 // Now that we've created any necessary subregister inserts, we can
2596 // create the copies.
2597 //
2598 // Perform the first copy separately as a subregister copy.
2599 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002600 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2601 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002602 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002603
2604 // Now, perform the remaining copies as vector lane copies.
2605 unsigned LaneIdx = 1;
2606 for (unsigned InsReg : InsertRegs) {
2607 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2608 MachineInstr &CopyInst =
2609 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2610 .addUse(InsReg)
2611 .addImm(LaneIdx);
2612 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2613 ++LaneIdx;
2614 }
2615
2616 // Separately constrain the first copy's destination. Because of the
2617 // limitation in constrainOperandRegClass, we can't guarantee that this will
2618 // actually be constrained. So, do it ourselves using the second operand.
2619 const TargetRegisterClass *RC =
2620 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2621 if (!RC) {
2622 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2623 return false;
2624 }
2625
2626 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2627 I.eraseFromParent();
2628 return true;
2629}
2630
Amara Emerson2ff22982019-03-14 22:48:15 +00002631bool AArch64InstructionSelector::selectConcatVectors(
2632 MachineInstr &I, MachineRegisterInfo &MRI) const {
2633 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2634 "Unexpected opcode");
2635 unsigned Dst = I.getOperand(0).getReg();
2636 unsigned Op1 = I.getOperand(1).getReg();
2637 unsigned Op2 = I.getOperand(2).getReg();
2638 MachineIRBuilder MIRBuilder(I);
2639 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2640 if (!ConcatMI)
2641 return false;
2642 I.eraseFromParent();
2643 return true;
2644}
2645
Amara Emerson1abe05c2019-02-21 20:20:16 +00002646void AArch64InstructionSelector::collectShuffleMaskIndices(
2647 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00002648 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00002649 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2650 assert(
2651 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2652 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2653 // Find the constant indices.
2654 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2655 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2656 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2657 // Look through copies.
2658 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2659 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2660 assert(ScalarDef && "Could not find def of copy operand");
2661 }
Amara Emerson2806fd02019-04-12 21:31:21 +00002662 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2663 // This be an undef if not a constant.
2664 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2665 Idxs.push_back(None);
2666 } else {
2667 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2668 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002669 }
2670}
2671
2672unsigned
2673AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2674 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00002675 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00002676 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2677 if (Align == 0)
2678 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2679
2680 MachineConstantPool *MCP = MF.getConstantPool();
2681 return MCP->getConstantPoolIndex(CPVal, Align);
2682}
2683
2684MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2685 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2686 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2687
2688 auto Adrp =
2689 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2690 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002691
2692 MachineInstr *LoadMI = nullptr;
2693 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2694 case 16:
2695 LoadMI =
2696 &*MIRBuilder
2697 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2698 .addConstantPoolIndex(CPIdx, 0,
2699 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2700 break;
2701 case 8:
2702 LoadMI = &*MIRBuilder
2703 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2704 .addConstantPoolIndex(
2705 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2706 break;
2707 default:
2708 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2709 << *CPVal->getType());
2710 return nullptr;
2711 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002712 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002713 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2714 return LoadMI;
2715}
2716
2717/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2718/// size and RB.
2719static std::pair<unsigned, unsigned>
2720getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2721 unsigned Opc, SubregIdx;
2722 if (RB.getID() == AArch64::GPRRegBankID) {
2723 if (EltSize == 32) {
2724 Opc = AArch64::INSvi32gpr;
2725 SubregIdx = AArch64::ssub;
2726 } else if (EltSize == 64) {
2727 Opc = AArch64::INSvi64gpr;
2728 SubregIdx = AArch64::dsub;
2729 } else {
2730 llvm_unreachable("invalid elt size!");
2731 }
2732 } else {
2733 if (EltSize == 8) {
2734 Opc = AArch64::INSvi8lane;
2735 SubregIdx = AArch64::bsub;
2736 } else if (EltSize == 16) {
2737 Opc = AArch64::INSvi16lane;
2738 SubregIdx = AArch64::hsub;
2739 } else if (EltSize == 32) {
2740 Opc = AArch64::INSvi32lane;
2741 SubregIdx = AArch64::ssub;
2742 } else if (EltSize == 64) {
2743 Opc = AArch64::INSvi64lane;
2744 SubregIdx = AArch64::dsub;
2745 } else {
2746 llvm_unreachable("invalid elt size!");
2747 }
2748 }
2749 return std::make_pair(Opc, SubregIdx);
2750}
2751
2752MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002753 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2754 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002755 // We implement a vector concat by:
2756 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2757 // 2. Insert the upper vector into the destination's upper element
2758 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2759 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2760
2761 const LLT Op1Ty = MRI.getType(Op1);
2762 const LLT Op2Ty = MRI.getType(Op2);
2763
2764 if (Op1Ty != Op2Ty) {
2765 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2766 return nullptr;
2767 }
2768 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2769
2770 if (Op1Ty.getSizeInBits() >= 128) {
2771 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2772 return nullptr;
2773 }
2774
2775 // At the moment we just support 64 bit vector concats.
2776 if (Op1Ty.getSizeInBits() != 64) {
2777 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2778 return nullptr;
2779 }
2780
2781 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2782 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2783 const TargetRegisterClass *DstRC =
2784 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2785
2786 MachineInstr *WidenedOp1 =
2787 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2788 MachineInstr *WidenedOp2 =
2789 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2790 if (!WidenedOp1 || !WidenedOp2) {
2791 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2792 return nullptr;
2793 }
2794
2795 // Now do the insert of the upper element.
2796 unsigned InsertOpc, InsSubRegIdx;
2797 std::tie(InsertOpc, InsSubRegIdx) =
2798 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2799
Amara Emerson2ff22982019-03-14 22:48:15 +00002800 if (!Dst)
2801 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002802 auto InsElt =
2803 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002804 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002805 .addImm(1) /* Lane index */
2806 .addUse(WidenedOp2->getOperand(0).getReg())
2807 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002808 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2809 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002810}
2811
Jessica Paquettea3843fe2019-05-01 22:39:43 +00002812MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
2813 MachineInstr &I, MachineRegisterInfo &MRI) const {
2814 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
2815 "Expected a G_FCONSTANT!");
2816 MachineOperand &ImmOp = I.getOperand(1);
2817 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
2818
2819 // Only handle 32 and 64 bit defs for now.
2820 if (DefSize != 32 && DefSize != 64)
2821 return nullptr;
2822
2823 // Don't handle null values using FMOV.
2824 if (ImmOp.getFPImm()->isNullValue())
2825 return nullptr;
2826
2827 // Get the immediate representation for the FMOV.
2828 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
2829 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
2830 : AArch64_AM::getFP64Imm(ImmValAPF);
2831
2832 // If this is -1, it means the immediate can't be represented as the requested
2833 // floating point value. Bail.
2834 if (Imm == -1)
2835 return nullptr;
2836
2837 // Update MI to represent the new FMOV instruction, constrain it, and return.
2838 ImmOp.ChangeToImmediate(Imm);
2839 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
2840 I.setDesc(TII.get(MovOpc));
2841 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2842 return &I;
2843}
2844
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002845bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
2846 MachineIRBuilder MIB(I);
2847 MachineRegisterInfo &MRI = *MIB.getMRI();
2848 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2849
2850 // We want to recognize this pattern:
2851 //
2852 // $z = G_FCMP pred, $x, $y
2853 // ...
2854 // $w = G_SELECT $z, $a, $b
2855 //
2856 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
2857 // some copies/truncs in between.)
2858 //
2859 // If we see this, then we can emit something like this:
2860 //
2861 // fcmp $x, $y
2862 // fcsel $w, $a, $b, pred
2863 //
2864 // Rather than emitting both of the rather long sequences in the standard
2865 // G_FCMP/G_SELECT select methods.
2866
2867 // First, check if the condition is defined by a compare.
2868 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
2869 while (CondDef) {
2870 // We can only fold if all of the defs have one use.
2871 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
2872 return false;
2873
2874 // We can skip over G_TRUNC since the condition is 1-bit.
2875 // Truncating/extending can have no impact on the value.
2876 unsigned Opc = CondDef->getOpcode();
2877 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
2878 break;
2879
Amara Emersond940e202019-06-06 07:33:47 +00002880 // Can't see past copies from physregs.
2881 if (Opc == TargetOpcode::COPY &&
2882 TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
2883 return false;
2884
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002885 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
2886 }
2887
2888 // Is the condition defined by a compare?
2889 // TODO: Handle G_ICMP.
2890 if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
2891 return false;
2892
2893 // Get the condition code for the select.
2894 AArch64CC::CondCode CondCode;
2895 AArch64CC::CondCode CondCode2;
2896 changeFCMPPredToAArch64CC(
2897 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
2898 CondCode2);
2899
2900 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
2901 // instructions to emit the comparison.
2902 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
2903 // unnecessary.
2904 if (CondCode2 != AArch64CC::AL)
2905 return false;
2906
2907 // Make sure we'll be able to select the compare.
2908 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
2909 if (!CmpOpc)
2910 return false;
2911
2912 // Emit a new compare.
2913 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
2914 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2915 Cmp.addUse(CondDef->getOperand(3).getReg());
2916
2917 // Emit the select.
2918 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
2919 auto CSel =
2920 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
2921 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
2922 .addImm(CondCode);
2923 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2924 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
2925 I.eraseFromParent();
2926 return true;
2927}
2928
Amara Emerson761ca2e2019-03-19 21:43:05 +00002929bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2930 // Try to match a vector splat operation into a dup instruction.
2931 // We're looking for this pattern:
2932 // %scalar:gpr(s64) = COPY $x0
2933 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2934 // %cst0:gpr(s32) = G_CONSTANT i32 0
2935 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2936 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2937 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2938 // %zerovec(<2 x s32>)
2939 //
2940 // ...into:
2941 // %splat = DUP %scalar
2942 // We use the regbank of the scalar to determine which kind of dup to use.
2943 MachineIRBuilder MIB(I);
2944 MachineRegisterInfo &MRI = *MIB.getMRI();
2945 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2946 using namespace TargetOpcode;
2947 using namespace MIPatternMatch;
2948
2949 // Begin matching the insert.
2950 auto *InsMI =
2951 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2952 if (!InsMI)
2953 return false;
2954 // Match the undef vector operand.
2955 auto *UndefMI =
2956 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2957 if (!UndefMI)
2958 return false;
2959 // Match the scalar being splatted.
2960 unsigned ScalarReg = InsMI->getOperand(2).getReg();
2961 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2962 // Match the index constant 0.
2963 int64_t Index = 0;
2964 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2965 return false;
2966
2967 // The shuffle's second operand doesn't matter if the mask is all zero.
2968 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2969 if (!ZeroVec)
2970 return false;
2971 int64_t Zero = 0;
2972 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2973 return false;
2974 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2975 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2976 return false; // This wasn't an all zeros vector.
2977 }
2978
2979 // We're done, now find out what kind of splat we need.
2980 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2981 LLT EltTy = VecTy.getElementType();
2982 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2983 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2984 return false;
2985 }
2986 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2987 static const unsigned OpcTable[2][2] = {
2988 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2989 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2990 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2991
2992 // For FP splats, we need to widen the scalar reg via undef too.
2993 if (IsFP) {
2994 MachineInstr *Widen = emitScalarToVector(
2995 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2996 if (!Widen)
2997 return false;
2998 ScalarReg = Widen->getOperand(0).getReg();
2999 }
3000 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3001 if (IsFP)
3002 Dup.addImm(0);
3003 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3004 I.eraseFromParent();
3005 return true;
3006}
3007
3008bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3009 if (TM.getOptLevel() == CodeGenOpt::None)
3010 return false;
3011 if (tryOptVectorDup(I))
3012 return true;
3013 return false;
3014}
3015
Amara Emerson1abe05c2019-02-21 20:20:16 +00003016bool AArch64InstructionSelector::selectShuffleVector(
3017 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003018 if (tryOptVectorShuffle(I))
3019 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003020 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3021 unsigned Src1Reg = I.getOperand(1).getReg();
3022 const LLT Src1Ty = MRI.getType(Src1Reg);
3023 unsigned Src2Reg = I.getOperand(2).getReg();
3024 const LLT Src2Ty = MRI.getType(Src2Reg);
3025
3026 MachineBasicBlock &MBB = *I.getParent();
3027 MachineFunction &MF = *MBB.getParent();
3028 LLVMContext &Ctx = MF.getFunction().getContext();
3029
3030 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3031 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00003032 // find the mask indices. If the mask element is undef, then
3033 // collectShuffleMaskIndices() will add a None entry for that index into
3034 // the list.
3035 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003036 collectShuffleMaskIndices(I, MRI, Mask);
3037 assert(!Mask.empty() && "Expected to find mask indices");
3038
3039 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3040 // it's originated from a <1 x T> type. Those should have been lowered into
3041 // G_BUILD_VECTOR earlier.
3042 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3043 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3044 return false;
3045 }
3046
3047 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3048
3049 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00003050 for (auto &MaybeVal : Mask) {
3051 // For now, any undef indexes we'll just assume to be 0. This should be
3052 // optimized in future, e.g. to select DUP etc.
3053 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003054 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3055 unsigned Offset = Byte + Val * BytesPerElt;
3056 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3057 }
3058 }
3059
Amara Emerson8acb0d92019-03-04 19:16:00 +00003060 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003061
3062 // Use a constant pool to load the index vector for TBL.
3063 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003064 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3065 if (!IndexLoad) {
3066 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3067 return false;
3068 }
3069
Amara Emerson8acb0d92019-03-04 19:16:00 +00003070 if (DstTy.getSizeInBits() != 128) {
3071 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3072 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00003073 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003074 if (!Concat) {
3075 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3076 return false;
3077 }
3078
3079 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3080 IndexLoad =
3081 emitScalarToVector(64, &AArch64::FPR128RegClass,
3082 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3083
3084 auto TBL1 = MIRBuilder.buildInstr(
3085 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3086 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3087 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3088
Amara Emerson3739a202019-03-15 21:59:50 +00003089 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00003090 MIRBuilder
3091 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3092 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003093 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3094 I.eraseFromParent();
3095 return true;
3096 }
3097
Amara Emerson1abe05c2019-02-21 20:20:16 +00003098 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3099 // Q registers for regalloc.
3100 auto RegSeq = MIRBuilder
3101 .buildInstr(TargetOpcode::REG_SEQUENCE,
3102 {&AArch64::QQRegClass}, {Src1Reg})
3103 .addImm(AArch64::qsub0)
3104 .addUse(Src2Reg)
3105 .addImm(AArch64::qsub1);
3106
3107 auto TBL2 =
3108 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3109 {RegSeq, IndexLoad->getOperand(0).getReg()});
3110 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3111 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3112 I.eraseFromParent();
3113 return true;
3114}
3115
Jessica Paquette16d67a32019-03-13 23:22:23 +00003116MachineInstr *AArch64InstructionSelector::emitLaneInsert(
3117 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
3118 unsigned LaneIdx, const RegisterBank &RB,
3119 MachineIRBuilder &MIRBuilder) const {
3120 MachineInstr *InsElt = nullptr;
3121 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3122 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3123
3124 // Create a register to define with the insert if one wasn't passed in.
3125 if (!DstReg)
3126 DstReg = MRI.createVirtualRegister(DstRC);
3127
3128 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3129 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3130
3131 if (RB.getID() == AArch64::FPRRegBankID) {
3132 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3133 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3134 .addImm(LaneIdx)
3135 .addUse(InsSub->getOperand(0).getReg())
3136 .addImm(0);
3137 } else {
3138 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3139 .addImm(LaneIdx)
3140 .addUse(EltReg);
3141 }
3142
3143 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3144 return InsElt;
3145}
3146
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003147bool AArch64InstructionSelector::selectInsertElt(
3148 MachineInstr &I, MachineRegisterInfo &MRI) const {
3149 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3150
3151 // Get information on the destination.
3152 unsigned DstReg = I.getOperand(0).getReg();
3153 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003154 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003155
3156 // Get information on the element we want to insert into the destination.
3157 unsigned EltReg = I.getOperand(2).getReg();
3158 const LLT EltTy = MRI.getType(EltReg);
3159 unsigned EltSize = EltTy.getSizeInBits();
3160 if (EltSize < 16 || EltSize > 64)
3161 return false; // Don't support all element types yet.
3162
3163 // Find the definition of the index. Bail out if it's not defined by a
3164 // G_CONSTANT.
3165 unsigned IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00003166 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3167 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003168 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00003169 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003170
3171 // Perform the lane insert.
3172 unsigned SrcReg = I.getOperand(1).getReg();
3173 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3174 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003175
3176 if (VecSize < 128) {
3177 // If the vector we're inserting into is smaller than 128 bits, widen it
3178 // to 128 to do the insert.
3179 MachineInstr *ScalarToVec = emitScalarToVector(
3180 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3181 if (!ScalarToVec)
3182 return false;
3183 SrcReg = ScalarToVec->getOperand(0).getReg();
3184 }
3185
3186 // Create an insert into a new FPR128 register.
3187 // Note that if our vector is already 128 bits, we end up emitting an extra
3188 // register.
3189 MachineInstr *InsMI =
3190 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3191
3192 if (VecSize < 128) {
3193 // If we had to widen to perform the insert, then we have to demote back to
3194 // the original size to get the result we want.
3195 unsigned DemoteVec = InsMI->getOperand(0).getReg();
3196 const TargetRegisterClass *RC =
3197 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3198 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3199 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3200 return false;
3201 }
3202 unsigned SubReg = 0;
3203 if (!getSubRegForClass(RC, TRI, SubReg))
3204 return false;
3205 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3206 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3207 << "\n");
3208 return false;
3209 }
3210 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3211 .addReg(DemoteVec, 0, SubReg);
3212 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3213 } else {
3214 // No widening needed.
3215 InsMI->getOperand(0).setReg(DstReg);
3216 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3217 }
3218
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003219 I.eraseFromParent();
3220 return true;
3221}
3222
Amara Emerson5ec14602018-12-10 18:44:58 +00003223bool AArch64InstructionSelector::selectBuildVector(
3224 MachineInstr &I, MachineRegisterInfo &MRI) const {
3225 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3226 // Until we port more of the optimized selections, for now just use a vector
3227 // insert sequence.
3228 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3229 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3230 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003231 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003232 return false; // Don't support all element types yet.
3233 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003234 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003235
3236 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003237 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003238 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3239 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003240 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003241 return false;
3242
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003243 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003244 unsigned DstSize = DstTy.getSizeInBits();
3245
3246 // Keep track of the last MI we inserted. Later on, we might be able to save
3247 // a copy using it.
3248 MachineInstr *PrevMI = nullptr;
3249 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003250 // Note that if we don't do a subregister copy, we can end up making an
3251 // extra register.
3252 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3253 MIRBuilder);
3254 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003255 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003256
3257 // If DstTy's size in bits is less than 128, then emit a subregister copy
3258 // from DstVec to the last register we've defined.
3259 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003260 // Force this to be FPR using the destination vector.
3261 const TargetRegisterClass *RC =
3262 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003263 if (!RC)
3264 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003265 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3266 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3267 return false;
3268 }
3269
3270 unsigned SubReg = 0;
3271 if (!getSubRegForClass(RC, TRI, SubReg))
3272 return false;
3273 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3274 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3275 << "\n");
3276 return false;
3277 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003278
3279 unsigned Reg = MRI.createVirtualRegister(RC);
3280 unsigned DstReg = I.getOperand(0).getReg();
3281
Amara Emerson86271782019-03-18 19:20:10 +00003282 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3283 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003284 MachineOperand &RegOp = I.getOperand(1);
3285 RegOp.setReg(Reg);
3286 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3287 } else {
3288 // We don't need a subregister copy. Save a copy by re-using the
3289 // destination register on the final insert.
3290 assert(PrevMI && "PrevMI was null?");
3291 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3292 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3293 }
3294
Amara Emerson5ec14602018-12-10 18:44:58 +00003295 I.eraseFromParent();
3296 return true;
3297}
3298
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003299/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3300/// ID if it exists, and 0 otherwise.
3301static unsigned findIntrinsicID(MachineInstr &I) {
3302 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3303 return Op.isIntrinsicID();
3304 });
3305 if (IntrinOp == I.operands_end())
3306 return 0;
3307 return IntrinOp->getIntrinsicID();
3308}
3309
Jessica Paquette22c62152019-04-02 19:57:26 +00003310/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3311/// intrinsic.
3312static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3313 switch (NumBytesToStore) {
3314 // TODO: 1, 2, and 4 byte stores.
3315 case 8:
3316 return AArch64::STLXRX;
3317 default:
3318 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3319 << NumBytesToStore << ")\n");
3320 break;
3321 }
3322 return 0;
3323}
3324
3325bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3326 MachineInstr &I, MachineRegisterInfo &MRI) const {
3327 // Find the intrinsic ID.
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003328 unsigned IntrinID = findIntrinsicID(I);
3329 if (!IntrinID)
Jessica Paquette22c62152019-04-02 19:57:26 +00003330 return false;
Jessica Paquette22c62152019-04-02 19:57:26 +00003331 MachineIRBuilder MIRBuilder(I);
3332
3333 // Select the instruction.
3334 switch (IntrinID) {
3335 default:
3336 return false;
3337 case Intrinsic::trap:
3338 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3339 break;
3340 case Intrinsic::aarch64_stlxr:
3341 unsigned StatReg = I.getOperand(0).getReg();
3342 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3343 "Status register must be 32 bits!");
3344 unsigned SrcReg = I.getOperand(2).getReg();
3345
3346 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3347 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3348 return false;
3349 }
3350
3351 unsigned PtrReg = I.getOperand(3).getReg();
3352 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3353
3354 // Expect only one memory operand.
3355 if (!I.hasOneMemOperand())
3356 return false;
3357
3358 const MachineMemOperand *MemOp = *I.memoperands_begin();
3359 unsigned NumBytesToStore = MemOp->getSize();
3360 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3361 if (!Opc)
3362 return false;
3363
3364 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
3365 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
3366 }
3367
3368 I.eraseFromParent();
3369 return true;
3370}
3371
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003372bool AArch64InstructionSelector::selectIntrinsic(
3373 MachineInstr &I, MachineRegisterInfo &MRI) const {
3374 unsigned IntrinID = findIntrinsicID(I);
3375 if (!IntrinID)
3376 return false;
3377 MachineIRBuilder MIRBuilder(I);
3378
3379 switch (IntrinID) {
3380 default:
3381 break;
3382 case Intrinsic::aarch64_crypto_sha1h:
3383 unsigned DstReg = I.getOperand(0).getReg();
3384 unsigned SrcReg = I.getOperand(2).getReg();
3385
3386 // FIXME: Should this be an assert?
3387 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
3388 MRI.getType(SrcReg).getSizeInBits() != 32)
3389 return false;
3390
3391 // The operation has to happen on FPRs. Set up some new FPR registers for
3392 // the source and destination if they are on GPRs.
3393 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3394 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3395 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
3396
3397 // Make sure the copy ends up getting constrained properly.
3398 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
3399 AArch64::GPR32RegClass, MRI);
3400 }
3401
3402 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
3403 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
3404
3405 // Actually insert the instruction.
3406 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
3407 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
3408
3409 // Did we create a new register for the destination?
3410 if (DstReg != I.getOperand(0).getReg()) {
3411 // Yep. Copy the result of the instruction back into the original
3412 // destination.
3413 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
3414 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3415 AArch64::GPR32RegClass, MRI);
3416 }
3417
3418 I.eraseFromParent();
3419 return true;
3420 }
3421 return false;
3422}
3423
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003424/// SelectArithImmed - Select an immediate value that can be represented as
3425/// a 12-bit value shifted left by either 0 or 12. If so, return true with
3426/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003427InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00003428AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003429 MachineInstr &MI = *Root.getParent();
3430 MachineBasicBlock &MBB = *MI.getParent();
3431 MachineFunction &MF = *MBB.getParent();
3432 MachineRegisterInfo &MRI = MF.getRegInfo();
3433
3434 // This function is called from the addsub_shifted_imm ComplexPattern,
3435 // which lists [imm] as the list of opcode it's interested in, however
3436 // we still need to check whether the operand is actually an immediate
3437 // here because the ComplexPattern opcode list is only used in
3438 // root-level opcode matching.
3439 uint64_t Immed;
3440 if (Root.isImm())
3441 Immed = Root.getImm();
3442 else if (Root.isCImm())
3443 Immed = Root.getCImm()->getZExtValue();
3444 else if (Root.isReg()) {
3445 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
3446 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003447 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003448 MachineOperand &Op1 = Def->getOperand(1);
3449 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003450 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00003451 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003452 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003453 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003454
3455 unsigned ShiftAmt;
3456
3457 if (Immed >> 12 == 0) {
3458 ShiftAmt = 0;
3459 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
3460 ShiftAmt = 12;
3461 Immed = Immed >> 12;
3462 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003463 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003464
3465 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00003466 return {{
3467 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
3468 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
3469 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00003470}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003471
Daniel Sandersea8711b2017-10-16 03:36:29 +00003472/// Select a "register plus unscaled signed 9-bit immediate" address. This
3473/// should only match when there is an offset that is not valid for a scaled
3474/// immediate addressing mode. The "Size" argument is the size in bytes of the
3475/// memory reference, which is needed here to know what is valid for a scaled
3476/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003477InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003478AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
3479 unsigned Size) const {
3480 MachineRegisterInfo &MRI =
3481 Root.getParent()->getParent()->getParent()->getRegInfo();
3482
3483 if (!Root.isReg())
3484 return None;
3485
3486 if (!isBaseWithConstantOffset(Root, MRI))
3487 return None;
3488
3489 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3490 if (!RootDef)
3491 return None;
3492
3493 MachineOperand &OffImm = RootDef->getOperand(2);
3494 if (!OffImm.isReg())
3495 return None;
3496 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
3497 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
3498 return None;
3499 int64_t RHSC;
3500 MachineOperand &RHSOp1 = RHS->getOperand(1);
3501 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
3502 return None;
3503 RHSC = RHSOp1.getCImm()->getSExtValue();
3504
3505 // If the offset is valid as a scaled immediate, don't match here.
3506 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
3507 return None;
3508 if (RHSC >= -256 && RHSC < 256) {
3509 MachineOperand &Base = RootDef->getOperand(1);
3510 return {{
3511 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
3512 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
3513 }};
3514 }
3515 return None;
3516}
3517
3518/// Select a "register plus scaled unsigned 12-bit immediate" address. The
3519/// "Size" argument is the size in bytes of the memory reference, which
3520/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00003521InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00003522AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
3523 unsigned Size) const {
3524 MachineRegisterInfo &MRI =
3525 Root.getParent()->getParent()->getParent()->getRegInfo();
3526
3527 if (!Root.isReg())
3528 return None;
3529
3530 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
3531 if (!RootDef)
3532 return None;
3533
3534 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
3535 return {{
3536 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
3537 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3538 }};
3539 }
3540
3541 if (isBaseWithConstantOffset(Root, MRI)) {
3542 MachineOperand &LHS = RootDef->getOperand(1);
3543 MachineOperand &RHS = RootDef->getOperand(2);
3544 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
3545 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
3546 if (LHSDef && RHSDef) {
3547 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
3548 unsigned Scale = Log2_32(Size);
3549 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
3550 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00003551 return {{
3552 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
3553 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3554 }};
3555
Daniel Sandersea8711b2017-10-16 03:36:29 +00003556 return {{
3557 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
3558 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
3559 }};
3560 }
3561 }
3562 }
3563
3564 // Before falling back to our general case, check if the unscaled
3565 // instructions can handle this. If so, that's preferable.
3566 if (selectAddrModeUnscaled(Root, Size).hasValue())
3567 return None;
3568
3569 return {{
3570 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
3571 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
3572 }};
3573}
3574
Volkan Kelesf7f25682018-01-16 18:44:05 +00003575void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
3576 const MachineInstr &MI) const {
3577 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3578 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
3579 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
3580 assert(CstVal && "Expected constant value");
3581 MIB.addImm(CstVal.getValue());
3582}
3583
Daniel Sanders0b5293f2017-04-06 09:49:34 +00003584namespace llvm {
3585InstructionSelector *
3586createAArch64InstructionSelector(const AArch64TargetMachine &TM,
3587 AArch64Subtarget &Subtarget,
3588 AArch64RegisterBankInfo &RBI) {
3589 return new AArch64InstructionSelector(TM, Subtarget, RBI);
3590}
3591}