blob: a4e97e1027581e8d057f6172efdfe00e0d10c944 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
62 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
63 MachineRegisterInfo &MRI) const;
64 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
65 MachineRegisterInfo &MRI) const;
66
67 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
68 MachineRegisterInfo &MRI) const;
69
Amara Emerson5ec14602018-12-10 18:44:58 +000070 // Helper to generate an equivalent of scalar_to_vector into a new register,
71 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000072 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000073 const TargetRegisterClass *DstRC,
74 unsigned Scalar,
75 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000076
77 /// Emit a lane insert into \p DstReg, or a new vector register if None is
78 /// provided.
79 ///
80 /// The lane inserted into is defined by \p LaneIdx. The vector source
81 /// register is given by \p SrcReg. The register containing the element is
82 /// given by \p EltReg.
83 MachineInstr *emitLaneInsert(Optional<unsigned> DstReg, unsigned SrcReg,
84 unsigned EltReg, unsigned LaneIdx,
85 const RegisterBank &RB,
86 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +000087 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000088 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +000089 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +000090 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +000091
Amara Emerson1abe05c2019-02-21 20:20:16 +000092 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
93 SmallVectorImpl<int> &Idxs) const;
94 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +000095 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +000096 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +000097 bool selectSplitVectorUnmerge(MachineInstr &I,
98 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +000099 bool selectIntrinsicWithSideEffects(MachineInstr &I,
100 MachineRegisterInfo &MRI) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000101
102 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
103 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
104 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000105
106 // Emit a vector concat operation.
107 MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
108 unsigned Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000109 MachineIRBuilder &MIRBuilder) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000110 MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
111 const RegisterBank &DstRB, LLT ScalarTy,
112 unsigned VecReg, unsigned LaneIdx,
113 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000114
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000115 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000116
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000117 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
118 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000119
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000120 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000121 return selectAddrModeUnscaled(Root, 1);
122 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000123 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000124 return selectAddrModeUnscaled(Root, 2);
125 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000126 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000127 return selectAddrModeUnscaled(Root, 4);
128 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000129 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000130 return selectAddrModeUnscaled(Root, 8);
131 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000132 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000133 return selectAddrModeUnscaled(Root, 16);
134 }
135
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000136 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
137 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000138 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000139 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000140 return selectAddrModeIndexed(Root, Width / 8);
141 }
142
Volkan Kelesf7f25682018-01-16 18:44:05 +0000143 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
144
Amara Emerson1e8c1642018-07-31 00:09:02 +0000145 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
146 void materializeLargeCMVal(MachineInstr &I, const Value *V,
147 unsigned char OpFlags) const;
148
Amara Emerson761ca2e2019-03-19 21:43:05 +0000149 // Optimization methods.
150
151 // Helper function to check if a reg def is an MI with a given opcode and
152 // returns it if so.
153 MachineInstr *findMIFromReg(unsigned Reg, unsigned Opc,
154 MachineIRBuilder &MIB) const {
155 auto *Def = MIB.getMRI()->getVRegDef(Reg);
156 if (!Def || Def->getOpcode() != Opc)
157 return nullptr;
158 return Def;
159 }
160
161 bool tryOptVectorShuffle(MachineInstr &I) const;
162 bool tryOptVectorDup(MachineInstr &MI) const;
163
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000164 const AArch64TargetMachine &TM;
165 const AArch64Subtarget &STI;
166 const AArch64InstrInfo &TII;
167 const AArch64RegisterInfo &TRI;
168 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000169
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000170#define GET_GLOBALISEL_PREDICATES_DECL
171#include "AArch64GenGlobalISel.inc"
172#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000173
174// We declare the temporaries used by selectImpl() in the class to minimize the
175// cost of constructing placeholder values.
176#define GET_GLOBALISEL_TEMPORARIES_DECL
177#include "AArch64GenGlobalISel.inc"
178#undef GET_GLOBALISEL_TEMPORARIES_DECL
179};
180
181} // end anonymous namespace
182
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000183#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000184#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000185#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000186
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000187AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000188 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
189 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000190 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000191 TRI(*STI.getRegisterInfo()), RBI(RBI),
192#define GET_GLOBALISEL_PREDICATES_INIT
193#include "AArch64GenGlobalISel.inc"
194#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000195#define GET_GLOBALISEL_TEMPORARIES_INIT
196#include "AArch64GenGlobalISel.inc"
197#undef GET_GLOBALISEL_TEMPORARIES_INIT
198{
199}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000200
Tim Northoverfb8d9892016-10-12 22:49:15 +0000201// FIXME: This should be target-independent, inferred from the types declared
202// for each class in the bank.
203static const TargetRegisterClass *
204getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000205 const RegisterBankInfo &RBI,
206 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000207 if (RB.getID() == AArch64::GPRRegBankID) {
208 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000209 return GetAllRegSet ? &AArch64::GPR32allRegClass
210 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000211 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000212 return GetAllRegSet ? &AArch64::GPR64allRegClass
213 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000214 return nullptr;
215 }
216
217 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000218 if (Ty.getSizeInBits() <= 16)
219 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000220 if (Ty.getSizeInBits() == 32)
221 return &AArch64::FPR32RegClass;
222 if (Ty.getSizeInBits() == 64)
223 return &AArch64::FPR64RegClass;
224 if (Ty.getSizeInBits() == 128)
225 return &AArch64::FPR128RegClass;
226 return nullptr;
227 }
228
229 return nullptr;
230}
231
Jessica Paquette245047d2019-01-24 22:00:41 +0000232/// Given a register bank, and size in bits, return the smallest register class
233/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000234static const TargetRegisterClass *
235getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
236 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000237 unsigned RegBankID = RB.getID();
238
239 if (RegBankID == AArch64::GPRRegBankID) {
240 if (SizeInBits <= 32)
241 return GetAllRegSet ? &AArch64::GPR32allRegClass
242 : &AArch64::GPR32RegClass;
243 if (SizeInBits == 64)
244 return GetAllRegSet ? &AArch64::GPR64allRegClass
245 : &AArch64::GPR64RegClass;
246 }
247
248 if (RegBankID == AArch64::FPRRegBankID) {
249 switch (SizeInBits) {
250 default:
251 return nullptr;
252 case 8:
253 return &AArch64::FPR8RegClass;
254 case 16:
255 return &AArch64::FPR16RegClass;
256 case 32:
257 return &AArch64::FPR32RegClass;
258 case 64:
259 return &AArch64::FPR64RegClass;
260 case 128:
261 return &AArch64::FPR128RegClass;
262 }
263 }
264
265 return nullptr;
266}
267
268/// Returns the correct subregister to use for a given register class.
269static bool getSubRegForClass(const TargetRegisterClass *RC,
270 const TargetRegisterInfo &TRI, unsigned &SubReg) {
271 switch (TRI.getRegSizeInBits(*RC)) {
272 case 8:
273 SubReg = AArch64::bsub;
274 break;
275 case 16:
276 SubReg = AArch64::hsub;
277 break;
278 case 32:
279 if (RC == &AArch64::GPR32RegClass)
280 SubReg = AArch64::sub_32;
281 else
282 SubReg = AArch64::ssub;
283 break;
284 case 64:
285 SubReg = AArch64::dsub;
286 break;
287 default:
288 LLVM_DEBUG(
289 dbgs() << "Couldn't find appropriate subregister for register class.");
290 return false;
291 }
292
293 return true;
294}
295
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000296/// Check whether \p I is a currently unsupported binary operation:
297/// - it has an unsized type
298/// - an operand is not a vreg
299/// - all operands are not in the same bank
300/// These are checks that should someday live in the verifier, but right now,
301/// these are mostly limitations of the aarch64 selector.
302static bool unsupportedBinOp(const MachineInstr &I,
303 const AArch64RegisterBankInfo &RBI,
304 const MachineRegisterInfo &MRI,
305 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000306 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000307 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000308 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000309 return true;
310 }
311
312 const RegisterBank *PrevOpBank = nullptr;
313 for (auto &MO : I.operands()) {
314 // FIXME: Support non-register operands.
315 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000316 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000317 return true;
318 }
319
320 // FIXME: Can generic operations have physical registers operands? If
321 // so, this will need to be taught about that, and we'll need to get the
322 // bank out of the minimal class for the register.
323 // Either way, this needs to be documented (and possibly verified).
324 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000325 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000326 return true;
327 }
328
329 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
330 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000331 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000332 return true;
333 }
334
335 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000336 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000337 return true;
338 }
339 PrevOpBank = OpBank;
340 }
341 return false;
342}
343
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000344/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000345/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000346/// and of size \p OpSize.
347/// \returns \p GenericOpc if the combination is unsupported.
348static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
349 unsigned OpSize) {
350 switch (RegBankID) {
351 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000352 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000353 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000354 case TargetOpcode::G_SHL:
355 return AArch64::LSLVWr;
356 case TargetOpcode::G_LSHR:
357 return AArch64::LSRVWr;
358 case TargetOpcode::G_ASHR:
359 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000360 default:
361 return GenericOpc;
362 }
Tim Northover55782222016-10-18 20:03:48 +0000363 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000364 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000365 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000366 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000367 case TargetOpcode::G_SHL:
368 return AArch64::LSLVXr;
369 case TargetOpcode::G_LSHR:
370 return AArch64::LSRVXr;
371 case TargetOpcode::G_ASHR:
372 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000373 default:
374 return GenericOpc;
375 }
376 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000377 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000378 case AArch64::FPRRegBankID:
379 switch (OpSize) {
380 case 32:
381 switch (GenericOpc) {
382 case TargetOpcode::G_FADD:
383 return AArch64::FADDSrr;
384 case TargetOpcode::G_FSUB:
385 return AArch64::FSUBSrr;
386 case TargetOpcode::G_FMUL:
387 return AArch64::FMULSrr;
388 case TargetOpcode::G_FDIV:
389 return AArch64::FDIVSrr;
390 default:
391 return GenericOpc;
392 }
393 case 64:
394 switch (GenericOpc) {
395 case TargetOpcode::G_FADD:
396 return AArch64::FADDDrr;
397 case TargetOpcode::G_FSUB:
398 return AArch64::FSUBDrr;
399 case TargetOpcode::G_FMUL:
400 return AArch64::FMULDrr;
401 case TargetOpcode::G_FDIV:
402 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000403 case TargetOpcode::G_OR:
404 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000405 default:
406 return GenericOpc;
407 }
408 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000409 break;
410 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000411 return GenericOpc;
412}
413
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000414/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
415/// appropriate for the (value) register bank \p RegBankID and of memory access
416/// size \p OpSize. This returns the variant with the base+unsigned-immediate
417/// addressing mode (e.g., LDRXui).
418/// \returns \p GenericOpc if the combination is unsupported.
419static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
420 unsigned OpSize) {
421 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
422 switch (RegBankID) {
423 case AArch64::GPRRegBankID:
424 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000425 case 8:
426 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
427 case 16:
428 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000429 case 32:
430 return isStore ? AArch64::STRWui : AArch64::LDRWui;
431 case 64:
432 return isStore ? AArch64::STRXui : AArch64::LDRXui;
433 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000434 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000435 case AArch64::FPRRegBankID:
436 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000437 case 8:
438 return isStore ? AArch64::STRBui : AArch64::LDRBui;
439 case 16:
440 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000441 case 32:
442 return isStore ? AArch64::STRSui : AArch64::LDRSui;
443 case 64:
444 return isStore ? AArch64::STRDui : AArch64::LDRDui;
445 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000446 break;
447 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000448 return GenericOpc;
449}
450
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000451#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000452/// Helper function that verifies that we have a valid copy at the end of
453/// selectCopy. Verifies that the source and dest have the expected sizes and
454/// then returns true.
455static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
456 const MachineRegisterInfo &MRI,
457 const TargetRegisterInfo &TRI,
458 const RegisterBankInfo &RBI) {
459 const unsigned DstReg = I.getOperand(0).getReg();
460 const unsigned SrcReg = I.getOperand(1).getReg();
461 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
462 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000463
Jessica Paquette245047d2019-01-24 22:00:41 +0000464 // Make sure the size of the source and dest line up.
465 assert(
466 (DstSize == SrcSize ||
467 // Copies are a mean to setup initial types, the number of
468 // bits may not exactly match.
469 (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
470 // Copies are a mean to copy bits around, as long as we are
471 // on the same register class, that's fine. Otherwise, that
472 // means we need some SUBREG_TO_REG or AND & co.
473 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
474 "Copy with different width?!");
475
476 // Check the size of the destination.
477 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
478 "GPRs cannot get more than 64-bit width values");
479
480 return true;
481}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000482#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000483
484/// Helper function for selectCopy. Inserts a subregister copy from
485/// \p *From to \p *To, linking it up to \p I.
486///
487/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
488///
489/// CopyReg (From class) = COPY SrcReg
490/// SubRegCopy (To class) = COPY CopyReg:SubReg
491/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000492static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000493 const RegisterBankInfo &RBI, unsigned SrcReg,
494 const TargetRegisterClass *From,
495 const TargetRegisterClass *To,
496 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000497 MachineIRBuilder MIB(I);
498 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000499 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
500 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000501 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000502 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000503
504 // It's possible that the destination register won't be constrained. Make
505 // sure that happens.
506 if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
507 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
508
Amara Emersondb211892018-02-20 05:11:57 +0000509 return true;
510}
511
Quentin Colombetcb629a82016-10-12 03:57:49 +0000512static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
513 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
514 const RegisterBankInfo &RBI) {
515
516 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000517 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000518 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
519 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
520 const TargetRegisterClass *DstRC = getMinClassForRegBank(
521 DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
522 if (!DstRC) {
523 LLVM_DEBUG(dbgs() << "Unexpected dest size "
524 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000525 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000526 }
527
Jessica Paquette245047d2019-01-24 22:00:41 +0000528 // A couple helpers below, for making sure that the copy we produce is valid.
529
530 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
531 // to verify that the src and dst are the same size, since that's handled by
532 // the SUBREG_TO_REG.
533 bool KnownValid = false;
534
535 // Returns true, or asserts if something we don't expect happens. Instead of
536 // returning true, we return isValidCopy() to ensure that we verify the
537 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000538 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000539 // If we have a bitcast or something, we can't have physical registers.
540 assert(
Simon Pilgrimdea61742019-01-25 11:38:40 +0000541 (I.isCopy() ||
542 (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
543 !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
544 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000545 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000546 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000547 return true;
548 };
549
550 // Is this a copy? If so, then we may need to insert a subregister copy, or
551 // a SUBREG_TO_REG.
552 if (I.isCopy()) {
553 // Yes. Check if there's anything to fix up.
554 const TargetRegisterClass *SrcRC = getMinClassForRegBank(
555 SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
Amara Emerson7e9f3482018-02-18 17:10:49 +0000556 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000557 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
558 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000559 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000560
561 // Is this a cross-bank copy?
562 if (DstRegBank.getID() != SrcRegBank.getID()) {
563 // If we're doing a cross-bank copy on different-sized registers, we need
564 // to do a bit more work.
565 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
566 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
567
568 if (SrcSize > DstSize) {
569 // We're doing a cross-bank copy into a smaller register. We need a
570 // subregister copy. First, get a register class that's on the same bank
571 // as the destination, but the same size as the source.
572 const TargetRegisterClass *SubregRC =
573 getMinClassForRegBank(DstRegBank, SrcSize, true);
574 assert(SubregRC && "Didn't get a register class for subreg?");
575
576 // Get the appropriate subregister for the destination.
577 unsigned SubReg = 0;
578 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
579 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
580 return false;
581 }
582
583 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000584 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000585 return CheckCopy();
586 }
587
588 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
589 SrcSize == 16) {
590 // Special case for FPR16 to GPR32.
591 // FIXME: This can probably be generalized like the above case.
592 unsigned PromoteReg =
593 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
594 BuildMI(*I.getParent(), I, I.getDebugLoc(),
595 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
596 .addImm(0)
597 .addUse(SrcReg)
598 .addImm(AArch64::hsub);
599 MachineOperand &RegOp = I.getOperand(1);
600 RegOp.setReg(PromoteReg);
601
602 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
603 KnownValid = true;
604 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000605 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000606
607 // If the destination is a physical register, then there's nothing to
608 // change, so we're done.
609 if (TargetRegisterInfo::isPhysicalRegister(DstReg))
610 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000611 }
612
Jessica Paquette245047d2019-01-24 22:00:41 +0000613 // No need to constrain SrcReg. It will get constrained when we hit another
614 // of its use or its defs. Copies do not have constraints.
615 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000616 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
617 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000618 return false;
619 }
620 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000621 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000622}
623
Tim Northover69271c62016-10-12 22:49:11 +0000624static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
625 if (!DstTy.isScalar() || !SrcTy.isScalar())
626 return GenericOpc;
627
628 const unsigned DstSize = DstTy.getSizeInBits();
629 const unsigned SrcSize = SrcTy.getSizeInBits();
630
631 switch (DstSize) {
632 case 32:
633 switch (SrcSize) {
634 case 32:
635 switch (GenericOpc) {
636 case TargetOpcode::G_SITOFP:
637 return AArch64::SCVTFUWSri;
638 case TargetOpcode::G_UITOFP:
639 return AArch64::UCVTFUWSri;
640 case TargetOpcode::G_FPTOSI:
641 return AArch64::FCVTZSUWSr;
642 case TargetOpcode::G_FPTOUI:
643 return AArch64::FCVTZUUWSr;
644 default:
645 return GenericOpc;
646 }
647 case 64:
648 switch (GenericOpc) {
649 case TargetOpcode::G_SITOFP:
650 return AArch64::SCVTFUXSri;
651 case TargetOpcode::G_UITOFP:
652 return AArch64::UCVTFUXSri;
653 case TargetOpcode::G_FPTOSI:
654 return AArch64::FCVTZSUWDr;
655 case TargetOpcode::G_FPTOUI:
656 return AArch64::FCVTZUUWDr;
657 default:
658 return GenericOpc;
659 }
660 default:
661 return GenericOpc;
662 }
663 case 64:
664 switch (SrcSize) {
665 case 32:
666 switch (GenericOpc) {
667 case TargetOpcode::G_SITOFP:
668 return AArch64::SCVTFUWDri;
669 case TargetOpcode::G_UITOFP:
670 return AArch64::UCVTFUWDri;
671 case TargetOpcode::G_FPTOSI:
672 return AArch64::FCVTZSUXSr;
673 case TargetOpcode::G_FPTOUI:
674 return AArch64::FCVTZUUXSr;
675 default:
676 return GenericOpc;
677 }
678 case 64:
679 switch (GenericOpc) {
680 case TargetOpcode::G_SITOFP:
681 return AArch64::SCVTFUXDri;
682 case TargetOpcode::G_UITOFP:
683 return AArch64::UCVTFUXDri;
684 case TargetOpcode::G_FPTOSI:
685 return AArch64::FCVTZSUXDr;
686 case TargetOpcode::G_FPTOUI:
687 return AArch64::FCVTZUUXDr;
688 default:
689 return GenericOpc;
690 }
691 default:
692 return GenericOpc;
693 }
694 default:
695 return GenericOpc;
696 };
697 return GenericOpc;
698}
699
Tim Northover6c02ad52016-10-12 22:49:04 +0000700static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
701 switch (P) {
702 default:
703 llvm_unreachable("Unknown condition code!");
704 case CmpInst::ICMP_NE:
705 return AArch64CC::NE;
706 case CmpInst::ICMP_EQ:
707 return AArch64CC::EQ;
708 case CmpInst::ICMP_SGT:
709 return AArch64CC::GT;
710 case CmpInst::ICMP_SGE:
711 return AArch64CC::GE;
712 case CmpInst::ICMP_SLT:
713 return AArch64CC::LT;
714 case CmpInst::ICMP_SLE:
715 return AArch64CC::LE;
716 case CmpInst::ICMP_UGT:
717 return AArch64CC::HI;
718 case CmpInst::ICMP_UGE:
719 return AArch64CC::HS;
720 case CmpInst::ICMP_ULT:
721 return AArch64CC::LO;
722 case CmpInst::ICMP_ULE:
723 return AArch64CC::LS;
724 }
725}
726
Tim Northover7dd378d2016-10-12 22:49:07 +0000727static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
728 AArch64CC::CondCode &CondCode,
729 AArch64CC::CondCode &CondCode2) {
730 CondCode2 = AArch64CC::AL;
731 switch (P) {
732 default:
733 llvm_unreachable("Unknown FP condition!");
734 case CmpInst::FCMP_OEQ:
735 CondCode = AArch64CC::EQ;
736 break;
737 case CmpInst::FCMP_OGT:
738 CondCode = AArch64CC::GT;
739 break;
740 case CmpInst::FCMP_OGE:
741 CondCode = AArch64CC::GE;
742 break;
743 case CmpInst::FCMP_OLT:
744 CondCode = AArch64CC::MI;
745 break;
746 case CmpInst::FCMP_OLE:
747 CondCode = AArch64CC::LS;
748 break;
749 case CmpInst::FCMP_ONE:
750 CondCode = AArch64CC::MI;
751 CondCode2 = AArch64CC::GT;
752 break;
753 case CmpInst::FCMP_ORD:
754 CondCode = AArch64CC::VC;
755 break;
756 case CmpInst::FCMP_UNO:
757 CondCode = AArch64CC::VS;
758 break;
759 case CmpInst::FCMP_UEQ:
760 CondCode = AArch64CC::EQ;
761 CondCode2 = AArch64CC::VS;
762 break;
763 case CmpInst::FCMP_UGT:
764 CondCode = AArch64CC::HI;
765 break;
766 case CmpInst::FCMP_UGE:
767 CondCode = AArch64CC::PL;
768 break;
769 case CmpInst::FCMP_ULT:
770 CondCode = AArch64CC::LT;
771 break;
772 case CmpInst::FCMP_ULE:
773 CondCode = AArch64CC::LE;
774 break;
775 case CmpInst::FCMP_UNE:
776 CondCode = AArch64CC::NE;
777 break;
778 }
779}
780
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000781bool AArch64InstructionSelector::selectCompareBranch(
782 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
783
784 const unsigned CondReg = I.getOperand(0).getReg();
785 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
786 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000787 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
788 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000789 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
790 return false;
791
792 unsigned LHS = CCMI->getOperand(2).getReg();
793 unsigned RHS = CCMI->getOperand(3).getReg();
794 if (!getConstantVRegVal(RHS, MRI))
795 std::swap(RHS, LHS);
796
797 const auto RHSImm = getConstantVRegVal(RHS, MRI);
798 if (!RHSImm || *RHSImm != 0)
799 return false;
800
801 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
802 if (RB.getID() != AArch64::GPRRegBankID)
803 return false;
804
805 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
806 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
807 return false;
808
809 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
810 unsigned CBOpc = 0;
811 if (CmpWidth <= 32)
812 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
813 else if (CmpWidth == 64)
814 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
815 else
816 return false;
817
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000818 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
819 .addUse(LHS)
820 .addMBB(DestMBB)
821 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000822
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000823 I.eraseFromParent();
824 return true;
825}
826
Tim Northovere9600d82017-02-08 17:57:27 +0000827bool AArch64InstructionSelector::selectVaStartAAPCS(
828 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
829 return false;
830}
831
832bool AArch64InstructionSelector::selectVaStartDarwin(
833 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
834 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
835 unsigned ListReg = I.getOperand(0).getReg();
836
837 unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
838
839 auto MIB =
840 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
841 .addDef(ArgsAddrReg)
842 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
843 .addImm(0)
844 .addImm(0);
845
846 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
847
848 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
849 .addUse(ArgsAddrReg)
850 .addUse(ListReg)
851 .addImm(0)
852 .addMemOperand(*I.memoperands_begin());
853
854 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
855 I.eraseFromParent();
856 return true;
857}
858
Amara Emerson1e8c1642018-07-31 00:09:02 +0000859void AArch64InstructionSelector::materializeLargeCMVal(
860 MachineInstr &I, const Value *V, unsigned char OpFlags) const {
861 MachineBasicBlock &MBB = *I.getParent();
862 MachineFunction &MF = *MBB.getParent();
863 MachineRegisterInfo &MRI = MF.getRegInfo();
864 MachineIRBuilder MIB(I);
865
Aditya Nandakumarcef44a22018-12-11 00:48:50 +0000866 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +0000867 MovZ->addOperand(MF, I.getOperand(1));
868 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
869 AArch64II::MO_NC);
870 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
871 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
872
873 auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
874 unsigned ForceDstReg) {
875 unsigned DstReg = ForceDstReg
876 ? ForceDstReg
877 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
878 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
879 if (auto *GV = dyn_cast<GlobalValue>(V)) {
880 MovI->addOperand(MF, MachineOperand::CreateGA(
881 GV, MovZ->getOperand(1).getOffset(), Flags));
882 } else {
883 MovI->addOperand(
884 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
885 MovZ->getOperand(1).getOffset(), Flags));
886 }
887 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
888 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
889 return DstReg;
890 };
Aditya Nandakumarfef76192019-02-05 22:14:40 +0000891 unsigned DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +0000892 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
893 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
894 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
895 return;
896}
897
Daniel Sandersf76f3152017-11-16 00:46:35 +0000898bool AArch64InstructionSelector::select(MachineInstr &I,
899 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000900 assert(I.getParent() && "Instruction should be in a basic block!");
901 assert(I.getParent()->getParent() && "Instruction should be in a function!");
902
903 MachineBasicBlock &MBB = *I.getParent();
904 MachineFunction &MF = *MBB.getParent();
905 MachineRegisterInfo &MRI = MF.getRegInfo();
906
Tim Northovercdf23f12016-10-31 18:30:59 +0000907 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000908 // G_PHI requires same handling as PHI
909 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +0000910 // Certain non-generic instructions also need some special handling.
911
912 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
913 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000914
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000915 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Tim Northover7d88da62016-11-08 00:34:06 +0000916 const unsigned DefReg = I.getOperand(0).getReg();
917 const LLT DefTy = MRI.getType(DefReg);
918
919 const TargetRegisterClass *DefRC = nullptr;
920 if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
921 DefRC = TRI.getRegClass(DefReg);
922 } else {
923 const RegClassOrRegBank &RegClassOrBank =
924 MRI.getRegClassOrRegBank(DefReg);
925
926 DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
927 if (!DefRC) {
928 if (!DefTy.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000929 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000930 return false;
931 }
932 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
933 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
934 if (!DefRC) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000935 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
Tim Northover7d88da62016-11-08 00:34:06 +0000936 return false;
937 }
938 }
939 }
Aditya Nandakumarefd8a842017-08-23 20:45:48 +0000940 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +0000941
942 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
943 }
944
945 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +0000946 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +0000947
948 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +0000949 }
950
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000951
952 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000953 LLVM_DEBUG(
954 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000955 return false;
956 }
957
Daniel Sandersf76f3152017-11-16 00:46:35 +0000958 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000959 return true;
960
Tim Northover32a078a2016-09-15 10:09:59 +0000961 LLT Ty =
962 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000963
Amara Emerson3739a202019-03-15 21:59:50 +0000964 MachineIRBuilder MIB(I);
965
Tim Northover69271c62016-10-12 22:49:11 +0000966 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +0000967 case TargetOpcode::G_BRCOND: {
968 if (Ty.getSizeInBits() > 32) {
969 // We shouldn't need this on AArch64, but it would be implemented as an
970 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
971 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000972 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
973 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +0000974 return false;
975 }
976
977 const unsigned CondReg = I.getOperand(0).getReg();
978 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
979
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000980 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
981 // instructions will not be produced, as they are conditional branch
982 // instructions that do not set flags.
983 bool ProduceNonFlagSettingCondBr =
984 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
985 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000986 return true;
987
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000988 if (ProduceNonFlagSettingCondBr) {
989 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
990 .addUse(CondReg)
991 .addImm(/*bit offset=*/0)
992 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +0000993
Kristof Beylse66bc1f2018-12-18 08:50:02 +0000994 I.eraseFromParent();
995 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
996 } else {
997 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
998 .addDef(AArch64::WZR)
999 .addUse(CondReg)
1000 .addImm(1);
1001 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1002 auto Bcc =
1003 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1004 .addImm(AArch64CC::EQ)
1005 .addMBB(DestMBB);
1006
1007 I.eraseFromParent();
1008 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1009 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001010 }
1011
Kristof Beyls65a12c02017-01-30 09:13:18 +00001012 case TargetOpcode::G_BRINDIRECT: {
1013 I.setDesc(TII.get(AArch64::BR));
1014 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1015 }
1016
Tim Northover4494d692016-10-18 19:47:57 +00001017 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001018 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001019 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1020
1021 const LLT s32 = LLT::scalar(32);
1022 const LLT s64 = LLT::scalar(64);
1023 const LLT p0 = LLT::pointer(0, 64);
1024
1025 const unsigned DefReg = I.getOperand(0).getReg();
1026 const LLT DefTy = MRI.getType(DefReg);
1027 const unsigned DefSize = DefTy.getSizeInBits();
1028 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1029
1030 // FIXME: Redundant check, but even less readable when factored out.
1031 if (isFP) {
1032 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001033 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1034 << " constant, expected: " << s32 << " or " << s64
1035 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001036 return false;
1037 }
1038
1039 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001040 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1041 << " constant on bank: " << RB
1042 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001043 return false;
1044 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001045
1046 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1047 // can be sure tablegen works correctly and isn't rescued by this code.
1048 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1049 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001050 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001051 // s32 and s64 are covered by tablegen.
1052 if (Ty != p0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001053 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1054 << " constant, expected: " << s32 << ", " << s64
1055 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001056 return false;
1057 }
1058
1059 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001060 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1061 << " constant on bank: " << RB
1062 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001063 return false;
1064 }
1065 }
1066
1067 const unsigned MovOpc =
1068 DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
1069
1070 I.setDesc(TII.get(MovOpc));
1071
1072 if (isFP) {
1073 const TargetRegisterClass &GPRRC =
1074 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1075 const TargetRegisterClass &FPRRC =
1076 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1077
1078 const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
1079 MachineOperand &RegOp = I.getOperand(0);
1080 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001081 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1082 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001083
1084 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001085 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001086 return false;
1087 }
1088
1089 MachineOperand &ImmOp = I.getOperand(1);
1090 // FIXME: Is going through int64_t always correct?
1091 ImmOp.ChangeToImmediate(
1092 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001093 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001094 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1095 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001096 } else if (I.getOperand(1).isImm()) {
1097 uint64_t Val = I.getOperand(1).getImm();
1098 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001099 }
1100
1101 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1102 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001103 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001104 case TargetOpcode::G_EXTRACT: {
1105 LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001106 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Amara Emerson242efdb2018-02-18 17:28:34 +00001107 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001108 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001109 // Larger extracts are vectors, same-size extracts should be something else
1110 // by now (either split up or simplified to a COPY).
1111 if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
1112 return false;
1113
Amara Emersonbc03bae2018-02-18 17:03:02 +00001114 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001115 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1116 Ty.getSizeInBits() - 1);
1117
Amara Emersonbc03bae2018-02-18 17:03:02 +00001118 if (SrcSize < 64) {
1119 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1120 "unexpected G_EXTRACT types");
1121 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1122 }
1123
Tim Northover7b6d66c2017-07-20 22:58:38 +00001124 unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001125 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001126 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1127 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001128 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1129 AArch64::GPR32RegClass, MRI);
1130 I.getOperand(0).setReg(DstReg);
1131
1132 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1133 }
1134
1135 case TargetOpcode::G_INSERT: {
1136 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001137 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1138 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001139 // Larger inserts are vectors, same-size ones should be something else by
1140 // now (split up or turned into COPYs).
1141 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1142 return false;
1143
Amara Emersonbc03bae2018-02-18 17:03:02 +00001144 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001145 unsigned LSB = I.getOperand(3).getImm();
1146 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001147 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001148 MachineInstrBuilder(MF, I).addImm(Width - 1);
1149
Amara Emersonbc03bae2018-02-18 17:03:02 +00001150 if (DstSize < 64) {
1151 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1152 "unexpected G_INSERT types");
1153 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1154 }
1155
Tim Northover7b6d66c2017-07-20 22:58:38 +00001156 unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
1157 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1158 TII.get(AArch64::SUBREG_TO_REG))
1159 .addDef(SrcReg)
1160 .addImm(0)
1161 .addUse(I.getOperand(2).getReg())
1162 .addImm(AArch64::sub_32);
1163 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1164 AArch64::GPR32RegClass, MRI);
1165 I.getOperand(2).setReg(SrcReg);
1166
1167 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1168 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001169 case TargetOpcode::G_FRAME_INDEX: {
1170 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001171 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001172 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1173 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001174 return false;
1175 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001176 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001177
1178 // MOs for a #0 shifted immediate.
1179 I.addOperand(MachineOperand::CreateImm(0));
1180 I.addOperand(MachineOperand::CreateImm(0));
1181
1182 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1183 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001184
1185 case TargetOpcode::G_GLOBAL_VALUE: {
1186 auto GV = I.getOperand(1).getGlobal();
1187 if (GV->isThreadLocal()) {
1188 // FIXME: we don't support TLS yet.
1189 return false;
1190 }
1191 unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001192 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001193 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001194 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001195 } else if (TM.getCodeModel() == CodeModel::Large) {
1196 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001197 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001198 I.eraseFromParent();
1199 return true;
David Green9dd1d452018-08-22 11:31:39 +00001200 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1201 I.setDesc(TII.get(AArch64::ADR));
1202 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001203 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001204 I.setDesc(TII.get(AArch64::MOVaddr));
1205 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1206 MachineInstrBuilder MIB(MF, I);
1207 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1208 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1209 }
1210 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1211 }
1212
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001213 case TargetOpcode::G_LOAD:
1214 case TargetOpcode::G_STORE: {
Tim Northover0f140c72016-09-09 11:46:34 +00001215 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001216
Tim Northover5ae83502016-09-15 09:20:34 +00001217 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001218 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1219 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001220 return false;
1221 }
1222
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001223 auto &MemOp = **I.memoperands_begin();
1224 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001225 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001226 return false;
1227 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001228 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001229
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001230 const unsigned PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001231#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001232 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001233 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001234 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1235 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001236 assert(MRI.getType(PtrReg).isPointer() &&
1237 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001238#endif
1239
1240 const unsigned ValReg = I.getOperand(0).getReg();
1241 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1242
1243 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001244 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001245 if (NewOpc == I.getOpcode())
1246 return false;
1247
1248 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001249
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001250 uint64_t Offset = 0;
1251 auto *PtrMI = MRI.getVRegDef(PtrReg);
1252
1253 // Try to fold a GEP into our unsigned immediate addressing mode.
1254 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1255 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1256 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001257 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001258 const unsigned Scale = Log2_32(Size);
1259 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1260 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1261 I.getOperand(1).setReg(Ptr2Reg);
1262 PtrMI = MRI.getVRegDef(Ptr2Reg);
1263 Offset = Imm / Size;
1264 }
1265 }
1266 }
1267
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001268 // If we haven't folded anything into our addressing mode yet, try to fold
1269 // a frame index into the base+offset.
1270 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1271 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1272
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001273 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001274
1275 // If we're storing a 0, use WZR/XZR.
1276 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1277 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1278 if (I.getOpcode() == AArch64::STRWui)
1279 I.getOperand(0).setReg(AArch64::WZR);
1280 else if (I.getOpcode() == AArch64::STRXui)
1281 I.getOperand(0).setReg(AArch64::XZR);
1282 }
1283 }
1284
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001285 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1286 }
1287
Tim Northover9dd78f82017-02-08 21:22:25 +00001288 case TargetOpcode::G_SMULH:
1289 case TargetOpcode::G_UMULH: {
1290 // Reject the various things we don't support yet.
1291 if (unsupportedBinOp(I, RBI, MRI, TRI))
1292 return false;
1293
1294 const unsigned DefReg = I.getOperand(0).getReg();
1295 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1296
1297 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001298 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001299 return false;
1300 }
1301
1302 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001303 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1304 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001305 return false;
1306 }
1307
1308 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1309 : AArch64::UMULHrr;
1310 I.setDesc(TII.get(NewOpc));
1311
1312 // Now that we selected an opcode, we need to constrain the register
1313 // operands to use appropriate classes.
1314 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1315 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001316 case TargetOpcode::G_FADD:
1317 case TargetOpcode::G_FSUB:
1318 case TargetOpcode::G_FMUL:
1319 case TargetOpcode::G_FDIV:
1320
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001321 case TargetOpcode::G_OR:
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001322 case TargetOpcode::G_SHL:
1323 case TargetOpcode::G_LSHR:
1324 case TargetOpcode::G_ASHR:
Tim Northover2fda4b02016-10-10 21:49:49 +00001325 case TargetOpcode::G_GEP: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001326 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001327 if (unsupportedBinOp(I, RBI, MRI, TRI))
1328 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001329
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001330 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001331
1332 const unsigned DefReg = I.getOperand(0).getReg();
1333 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1334
1335 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1336 if (NewOpc == I.getOpcode())
1337 return false;
1338
1339 I.setDesc(TII.get(NewOpc));
1340 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001341
1342 // Now that we selected an opcode, we need to constrain the register
1343 // operands to use appropriate classes.
1344 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1345 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001346
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001347 case TargetOpcode::G_UADDO: {
1348 // TODO: Support other types.
1349 unsigned OpSize = Ty.getSizeInBits();
1350 if (OpSize != 32 && OpSize != 64) {
1351 LLVM_DEBUG(
1352 dbgs()
1353 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1354 return false;
1355 }
1356
1357 // TODO: Support vectors.
1358 if (Ty.isVector()) {
1359 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1360 return false;
1361 }
1362
1363 // Add and set the set condition flag.
1364 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1365 MachineIRBuilder MIRBuilder(I);
1366 auto AddsMI = MIRBuilder.buildInstr(
1367 AddsOpc, {I.getOperand(0).getReg()},
1368 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1369 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1370
1371 // Now, put the overflow result in the register given by the first operand
1372 // to the G_UADDO. CSINC increments the result when the predicate is false,
1373 // so to get the increment when it's true, we need to use the inverse. In
1374 // this case, we want to increment when carry is set.
1375 auto CsetMI = MIRBuilder
1376 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
1377 {AArch64::WZR, AArch64::WZR})
1378 .addImm(getInvertedCondCode(AArch64CC::HS));
1379 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1380 I.eraseFromParent();
1381 return true;
1382 }
1383
Tim Northover398c5f52017-02-14 20:56:29 +00001384 case TargetOpcode::G_PTR_MASK: {
1385 uint64_t Align = I.getOperand(2).getImm();
1386 if (Align >= 64 || Align == 0)
1387 return false;
1388
1389 uint64_t Mask = ~((1ULL << Align) - 1);
1390 I.setDesc(TII.get(AArch64::ANDXri));
1391 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1392
1393 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1394 }
Tim Northover037af52c2016-10-31 18:31:09 +00001395 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001396 case TargetOpcode::G_TRUNC: {
1397 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1398 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1399
1400 const unsigned DstReg = I.getOperand(0).getReg();
1401 const unsigned SrcReg = I.getOperand(1).getReg();
1402
1403 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1404 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1405
1406 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001407 LLVM_DEBUG(
1408 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001409 return false;
1410 }
1411
1412 if (DstRB.getID() == AArch64::GPRRegBankID) {
1413 const TargetRegisterClass *DstRC =
1414 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1415 if (!DstRC)
1416 return false;
1417
1418 const TargetRegisterClass *SrcRC =
1419 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1420 if (!SrcRC)
1421 return false;
1422
1423 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1424 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001425 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001426 return false;
1427 }
1428
1429 if (DstRC == SrcRC) {
1430 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001431 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1432 SrcTy == LLT::scalar(64)) {
1433 llvm_unreachable("TableGen can import this case");
1434 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001435 } else if (DstRC == &AArch64::GPR32RegClass &&
1436 SrcRC == &AArch64::GPR64RegClass) {
1437 I.getOperand(1).setSubReg(AArch64::sub_32);
1438 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001439 LLVM_DEBUG(
1440 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001441 return false;
1442 }
1443
1444 I.setDesc(TII.get(TargetOpcode::COPY));
1445 return true;
1446 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1447 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1448 I.setDesc(TII.get(AArch64::XTNv4i16));
1449 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1450 return true;
1451 }
1452 }
1453
1454 return false;
1455 }
1456
Tim Northover3d38b3a2016-10-11 20:50:21 +00001457 case TargetOpcode::G_ANYEXT: {
1458 const unsigned DstReg = I.getOperand(0).getReg();
1459 const unsigned SrcReg = I.getOperand(1).getReg();
1460
Quentin Colombetcb629a82016-10-12 03:57:49 +00001461 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
1462 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001463 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
1464 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00001465 return false;
1466 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001467
Quentin Colombetcb629a82016-10-12 03:57:49 +00001468 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
1469 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001470 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
1471 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001472 return false;
1473 }
1474
1475 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
1476
1477 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001478 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001479 return false;
1480 }
1481
Quentin Colombetcb629a82016-10-12 03:57:49 +00001482 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001483 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
1484 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001485 return false;
1486 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001487 // At this point G_ANYEXT is just like a plain COPY, but we need
1488 // to explicitly form the 64-bit value if any.
1489 if (DstSize > 32) {
1490 unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
1491 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1492 .addDef(ExtSrc)
1493 .addImm(0)
1494 .addUse(SrcReg)
1495 .addImm(AArch64::sub_32);
1496 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001497 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00001498 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00001499 }
1500
1501 case TargetOpcode::G_ZEXT:
1502 case TargetOpcode::G_SEXT: {
1503 unsigned Opcode = I.getOpcode();
1504 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1505 SrcTy = MRI.getType(I.getOperand(1).getReg());
1506 const bool isSigned = Opcode == TargetOpcode::G_SEXT;
1507 const unsigned DefReg = I.getOperand(0).getReg();
1508 const unsigned SrcReg = I.getOperand(1).getReg();
1509 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1510
1511 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001512 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
1513 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001514 return false;
1515 }
1516
1517 MachineInstr *ExtI;
1518 if (DstTy == LLT::scalar(64)) {
1519 // FIXME: Can we avoid manually doing this?
1520 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001521 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
1522 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00001523 return false;
1524 }
1525
1526 const unsigned SrcXReg =
1527 MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1528 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
1529 .addDef(SrcXReg)
1530 .addImm(0)
1531 .addUse(SrcReg)
1532 .addImm(AArch64::sub_32);
1533
1534 const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
1535 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1536 .addDef(DefReg)
1537 .addUse(SrcXReg)
1538 .addImm(0)
1539 .addImm(SrcTy.getSizeInBits() - 1);
Tim Northovera9105be2016-11-09 22:39:54 +00001540 } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00001541 const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
1542 ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
1543 .addDef(DefReg)
1544 .addUse(SrcReg)
1545 .addImm(0)
1546 .addImm(SrcTy.getSizeInBits() - 1);
1547 } else {
1548 return false;
1549 }
1550
1551 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1552
1553 I.eraseFromParent();
1554 return true;
1555 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001556
Tim Northover69271c62016-10-12 22:49:11 +00001557 case TargetOpcode::G_SITOFP:
1558 case TargetOpcode::G_UITOFP:
1559 case TargetOpcode::G_FPTOSI:
1560 case TargetOpcode::G_FPTOUI: {
1561 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
1562 SrcTy = MRI.getType(I.getOperand(1).getReg());
1563 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
1564 if (NewOpc == Opcode)
1565 return false;
1566
1567 I.setDesc(TII.get(NewOpc));
1568 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1569
1570 return true;
1571 }
1572
1573
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00001574 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00001575 // The importer is currently unable to import pointer types since they
1576 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00001577 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00001578
Daniel Sandersedd07842017-08-17 09:26:14 +00001579 case TargetOpcode::G_BITCAST:
1580 // Imported SelectionDAG rules can handle every bitcast except those that
1581 // bitcast from a type to the same type. Ideally, these shouldn't occur
1582 // but we might not run an optimizer that deletes them.
1583 if (MRI.getType(I.getOperand(0).getReg()) ==
1584 MRI.getType(I.getOperand(1).getReg()))
1585 return selectCopy(I, TII, MRI, TRI, RBI);
1586 return false;
1587
Tim Northover9ac0eba2016-11-08 00:45:29 +00001588 case TargetOpcode::G_SELECT: {
1589 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001590 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
1591 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00001592 return false;
1593 }
1594
1595 const unsigned CondReg = I.getOperand(1).getReg();
1596 const unsigned TReg = I.getOperand(2).getReg();
1597 const unsigned FReg = I.getOperand(3).getReg();
1598
1599 unsigned CSelOpc = 0;
1600
1601 if (Ty == LLT::scalar(32)) {
1602 CSelOpc = AArch64::CSELWr;
Kristof Beylse9412b42017-01-19 13:32:14 +00001603 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
Tim Northover9ac0eba2016-11-08 00:45:29 +00001604 CSelOpc = AArch64::CSELXr;
1605 } else {
1606 return false;
1607 }
1608
1609 MachineInstr &TstMI =
1610 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1611 .addDef(AArch64::WZR)
1612 .addUse(CondReg)
1613 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
1614
1615 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
1616 .addDef(I.getOperand(0).getReg())
1617 .addUse(TReg)
1618 .addUse(FReg)
1619 .addImm(AArch64CC::NE);
1620
1621 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
1622 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
1623
1624 I.eraseFromParent();
1625 return true;
1626 }
Tim Northover6c02ad52016-10-12 22:49:04 +00001627 case TargetOpcode::G_ICMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001628 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001629 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
1630 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00001631 return false;
1632 }
1633
1634 unsigned CmpOpc = 0;
1635 unsigned ZReg = 0;
1636
1637 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1638 if (CmpTy == LLT::scalar(32)) {
1639 CmpOpc = AArch64::SUBSWrr;
1640 ZReg = AArch64::WZR;
1641 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
1642 CmpOpc = AArch64::SUBSXrr;
1643 ZReg = AArch64::XZR;
1644 } else {
1645 return false;
1646 }
1647
Kristof Beyls22524402017-01-05 10:16:08 +00001648 // CSINC increments the result by one when the condition code is false.
1649 // Therefore, we have to invert the predicate to get an increment by 1 when
1650 // the predicate is true.
1651 const AArch64CC::CondCode invCC =
1652 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
1653 (CmpInst::Predicate)I.getOperand(1).getPredicate()));
Tim Northover6c02ad52016-10-12 22:49:04 +00001654
1655 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1656 .addDef(ZReg)
1657 .addUse(I.getOperand(2).getReg())
1658 .addUse(I.getOperand(3).getReg());
1659
1660 MachineInstr &CSetMI =
1661 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1662 .addDef(I.getOperand(0).getReg())
1663 .addUse(AArch64::WZR)
1664 .addUse(AArch64::WZR)
Kristof Beyls22524402017-01-05 10:16:08 +00001665 .addImm(invCC);
Tim Northover6c02ad52016-10-12 22:49:04 +00001666
1667 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1668 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1669
1670 I.eraseFromParent();
1671 return true;
1672 }
1673
Tim Northover7dd378d2016-10-12 22:49:07 +00001674 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00001675 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001676 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
1677 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00001678 return false;
1679 }
1680
1681 unsigned CmpOpc = 0;
1682 LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
1683 if (CmpTy == LLT::scalar(32)) {
1684 CmpOpc = AArch64::FCMPSrr;
1685 } else if (CmpTy == LLT::scalar(64)) {
1686 CmpOpc = AArch64::FCMPDrr;
1687 } else {
1688 return false;
1689 }
1690
1691 // FIXME: regbank
1692
1693 AArch64CC::CondCode CC1, CC2;
1694 changeFCMPPredToAArch64CC(
1695 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
1696
1697 MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
1698 .addUse(I.getOperand(2).getReg())
1699 .addUse(I.getOperand(3).getReg());
1700
1701 const unsigned DefReg = I.getOperand(0).getReg();
1702 unsigned Def1Reg = DefReg;
1703 if (CC2 != AArch64CC::AL)
1704 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1705
1706 MachineInstr &CSetMI =
1707 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1708 .addDef(Def1Reg)
1709 .addUse(AArch64::WZR)
1710 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001711 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00001712
1713 if (CC2 != AArch64CC::AL) {
1714 unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1715 MachineInstr &CSet2MI =
1716 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
1717 .addDef(Def2Reg)
1718 .addUse(AArch64::WZR)
1719 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00001720 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00001721 MachineInstr &OrMI =
1722 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
1723 .addDef(DefReg)
1724 .addUse(Def1Reg)
1725 .addUse(Def2Reg);
1726 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
1727 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
1728 }
1729
1730 constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
1731 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
1732
1733 I.eraseFromParent();
1734 return true;
1735 }
Tim Northovere9600d82017-02-08 17:57:27 +00001736 case TargetOpcode::G_VASTART:
1737 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
1738 : selectVaStartAAPCS(I, MF, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00001739 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00001740 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001741 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00001742 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00001743 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1744 const unsigned DstReg = I.getOperand(0).getReg();
1745 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1746 const TargetRegisterClass *DstRC =
1747 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1748 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00001749 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001750 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00001751 case TargetOpcode::G_BLOCK_ADDR: {
1752 if (TM.getCodeModel() == CodeModel::Large) {
1753 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
1754 I.eraseFromParent();
1755 return true;
1756 } else {
1757 I.setDesc(TII.get(AArch64::MOVaddrBA));
1758 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
1759 I.getOperand(0).getReg())
1760 .addBlockAddress(I.getOperand(1).getBlockAddress(),
1761 /* Offset */ 0, AArch64II::MO_PAGE)
1762 .addBlockAddress(
1763 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
1764 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
1765 I.eraseFromParent();
1766 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
1767 }
1768 }
Amara Emerson5ec14602018-12-10 18:44:58 +00001769 case TargetOpcode::G_BUILD_VECTOR:
1770 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001771 case TargetOpcode::G_MERGE_VALUES:
1772 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00001773 case TargetOpcode::G_UNMERGE_VALUES:
1774 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00001775 case TargetOpcode::G_SHUFFLE_VECTOR:
1776 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00001777 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1778 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00001779 case TargetOpcode::G_INSERT_VECTOR_ELT:
1780 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00001781 case TargetOpcode::G_CONCAT_VECTORS:
1782 return selectConcatVectors(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00001783 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001784
1785 return false;
1786}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00001787
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001788MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Amara Emerson8acb0d92019-03-04 19:16:00 +00001789 unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001790 MachineIRBuilder &MIRBuilder) const {
1791 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00001792
1793 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001794 auto Ins =
1795 MIRBuilder
1796 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
1797 .addImm(SubregIndex);
1798 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
1799 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
1800 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00001801 };
1802
Amara Emerson8acb0d92019-03-04 19:16:00 +00001803 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00001804 case 16:
1805 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00001806 case 32:
1807 return BuildFn(AArch64::ssub);
1808 case 64:
1809 return BuildFn(AArch64::dsub);
1810 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00001811 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00001812 }
1813}
1814
Amara Emerson8cb186c2018-12-20 01:11:04 +00001815bool AArch64InstructionSelector::selectMergeValues(
1816 MachineInstr &I, MachineRegisterInfo &MRI) const {
1817 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
1818 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1819 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1820 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
1821
1822 // At the moment we only support merging two s32s into an s64.
1823 if (I.getNumOperands() != 3)
1824 return false;
1825 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
1826 return false;
1827 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
1828 if (RB.getID() != AArch64::GPRRegBankID)
1829 return false;
1830
1831 auto *DstRC = &AArch64::GPR64RegClass;
1832 unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
1833 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1834 TII.get(TargetOpcode::SUBREG_TO_REG))
1835 .addDef(SubToRegDef)
1836 .addImm(0)
1837 .addUse(I.getOperand(1).getReg())
1838 .addImm(AArch64::sub_32);
1839 unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
1840 // Need to anyext the second scalar before we can use bfm
1841 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1842 TII.get(TargetOpcode::SUBREG_TO_REG))
1843 .addDef(SubToRegDef2)
1844 .addImm(0)
1845 .addUse(I.getOperand(2).getReg())
1846 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00001847 MachineInstr &BFM =
1848 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00001849 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00001850 .addUse(SubToRegDef)
1851 .addUse(SubToRegDef2)
1852 .addImm(32)
1853 .addImm(31);
1854 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
1855 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
1856 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
1857 I.eraseFromParent();
1858 return true;
1859}
1860
Jessica Paquette607774c2019-03-11 22:18:01 +00001861static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
1862 const unsigned EltSize) {
1863 // Choose a lane copy opcode and subregister based off of the size of the
1864 // vector's elements.
1865 switch (EltSize) {
1866 case 16:
1867 CopyOpc = AArch64::CPYi16;
1868 ExtractSubReg = AArch64::hsub;
1869 break;
1870 case 32:
1871 CopyOpc = AArch64::CPYi32;
1872 ExtractSubReg = AArch64::ssub;
1873 break;
1874 case 64:
1875 CopyOpc = AArch64::CPYi64;
1876 ExtractSubReg = AArch64::dsub;
1877 break;
1878 default:
1879 // Unknown size, bail out.
1880 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
1881 return false;
1882 }
1883 return true;
1884}
1885
Jessica Paquettebb1aced2019-03-13 21:19:29 +00001886/// Given a register \p Reg, find the value of a constant defining \p Reg.
1887/// Return true if one could be found, and store it in \p Val. Return false
1888/// otherwise.
1889static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI,
1890 unsigned &Val) {
1891 // Look at the def of the register.
1892 MachineInstr *Def = MRI.getVRegDef(Reg);
1893 if (!Def)
1894 return false;
1895
1896 // Find the first definition which isn't a copy.
1897 if (Def->isCopy()) {
1898 Reg = Def->getOperand(1).getReg();
1899 auto It = find_if_not(MRI.reg_nodbg_instructions(Reg),
1900 [](const MachineInstr &MI) { return MI.isCopy(); });
1901 if (It == MRI.reg_instr_nodbg_end()) {
1902 LLVM_DEBUG(dbgs() << "Couldn't find non-copy def for register\n");
1903 return false;
1904 }
1905 Def = &*It;
1906 }
1907
1908 // TODO: Handle opcodes other than G_CONSTANT.
1909 if (Def->getOpcode() != TargetOpcode::G_CONSTANT) {
1910 LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
1911 "currently unsupported.\n");
1912 return false;
1913 }
1914
1915 // Return the constant value associated with the operand.
1916 Val = Def->getOperand(1).getCImm()->getLimitedValue();
1917 return true;
1918}
1919
Amara Emersond61b89b2019-03-14 22:48:18 +00001920MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
1921 Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
1922 unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
1923 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1924 unsigned CopyOpc = 0;
1925 unsigned ExtractSubReg = 0;
1926 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
1927 LLVM_DEBUG(
1928 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
1929 return nullptr;
1930 }
1931
1932 const TargetRegisterClass *DstRC =
1933 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
1934 if (!DstRC) {
1935 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
1936 return nullptr;
1937 }
1938
1939 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
1940 const LLT &VecTy = MRI.getType(VecReg);
1941 const TargetRegisterClass *VecRC =
1942 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
1943 if (!VecRC) {
1944 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
1945 return nullptr;
1946 }
1947
1948 // The register that we're going to copy into.
1949 unsigned InsertReg = VecReg;
1950 if (!DstReg)
1951 DstReg = MRI.createVirtualRegister(DstRC);
1952 // If the lane index is 0, we just use a subregister COPY.
1953 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00001954 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
1955 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001956 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00001957 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00001958 }
1959
1960 // Lane copies require 128-bit wide registers. If we're dealing with an
1961 // unpacked vector, then we need to move up to that width. Insert an implicit
1962 // def and a subregister insert to get us there.
1963 if (VecTy.getSizeInBits() != 128) {
1964 MachineInstr *ScalarToVector = emitScalarToVector(
1965 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
1966 if (!ScalarToVector)
1967 return nullptr;
1968 InsertReg = ScalarToVector->getOperand(0).getReg();
1969 }
1970
1971 MachineInstr *LaneCopyMI =
1972 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
1973 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
1974
1975 // Make sure that we actually constrain the initial copy.
1976 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
1977 return LaneCopyMI;
1978}
1979
Jessica Paquette607774c2019-03-11 22:18:01 +00001980bool AArch64InstructionSelector::selectExtractElt(
1981 MachineInstr &I, MachineRegisterInfo &MRI) const {
1982 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
1983 "unexpected opcode!");
1984 unsigned DstReg = I.getOperand(0).getReg();
1985 const LLT NarrowTy = MRI.getType(DstReg);
1986 const unsigned SrcReg = I.getOperand(1).getReg();
1987 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00001988 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00001989 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
1990 "source register size too small!");
1991 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
1992
1993 // Need the lane index to determine the correct copy opcode.
1994 MachineOperand &LaneIdxOp = I.getOperand(2);
1995 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
1996
1997 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
1998 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
1999 return false;
2000 }
2001
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002002 // Find the index to extract from.
2003 unsigned LaneIdx = 0;
2004 if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
Jessica Paquette607774c2019-03-11 22:18:01 +00002005 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002006
Jessica Paquette607774c2019-03-11 22:18:01 +00002007 MachineIRBuilder MIRBuilder(I);
2008
Amara Emersond61b89b2019-03-14 22:48:18 +00002009 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2010 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2011 LaneIdx, MIRBuilder);
2012 if (!Extract)
2013 return false;
2014
2015 I.eraseFromParent();
2016 return true;
2017}
2018
2019bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2020 MachineInstr &I, MachineRegisterInfo &MRI) const {
2021 unsigned NumElts = I.getNumOperands() - 1;
2022 unsigned SrcReg = I.getOperand(NumElts).getReg();
2023 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2024 const LLT SrcTy = MRI.getType(SrcReg);
2025
2026 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2027 if (SrcTy.getSizeInBits() > 128) {
2028 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2029 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002030 }
2031
Amara Emersond61b89b2019-03-14 22:48:18 +00002032 MachineIRBuilder MIB(I);
2033
2034 // We implement a split vector operation by treating the sub-vectors as
2035 // scalars and extracting them.
2036 const RegisterBank &DstRB =
2037 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2038 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
2039 unsigned Dst = I.getOperand(OpIdx).getReg();
2040 MachineInstr *Extract =
2041 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2042 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002043 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002044 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002045 I.eraseFromParent();
2046 return true;
2047}
2048
Jessica Paquette245047d2019-01-24 22:00:41 +00002049bool AArch64InstructionSelector::selectUnmergeValues(
2050 MachineInstr &I, MachineRegisterInfo &MRI) const {
2051 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2052 "unexpected opcode");
2053
2054 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2055 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2056 AArch64::FPRRegBankID ||
2057 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2058 AArch64::FPRRegBankID) {
2059 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2060 "currently unsupported.\n");
2061 return false;
2062 }
2063
2064 // The last operand is the vector source register, and every other operand is
2065 // a register to unpack into.
2066 unsigned NumElts = I.getNumOperands() - 1;
2067 unsigned SrcReg = I.getOperand(NumElts).getReg();
2068 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2069 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002070 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002071 assert(WideTy.isVector() && "can only unmerge from vector types!");
2072 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2073 "source register size too small!");
2074
Amara Emersond61b89b2019-03-14 22:48:18 +00002075 if (!NarrowTy.isScalar())
2076 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002077
Amara Emerson3739a202019-03-15 21:59:50 +00002078 MachineIRBuilder MIB(I);
2079
Jessica Paquette245047d2019-01-24 22:00:41 +00002080 // Choose a lane copy opcode and subregister based off of the size of the
2081 // vector's elements.
2082 unsigned CopyOpc = 0;
2083 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002084 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002085 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002086
2087 // Set up for the lane copies.
2088 MachineBasicBlock &MBB = *I.getParent();
2089
2090 // Stores the registers we'll be copying from.
2091 SmallVector<unsigned, 4> InsertRegs;
2092
2093 // We'll use the first register twice, so we only need NumElts-1 registers.
2094 unsigned NumInsertRegs = NumElts - 1;
2095
2096 // If our elements fit into exactly 128 bits, then we can copy from the source
2097 // directly. Otherwise, we need to do a bit of setup with some subregister
2098 // inserts.
2099 if (NarrowTy.getSizeInBits() * NumElts == 128) {
2100 InsertRegs = SmallVector<unsigned, 4>(NumInsertRegs, SrcReg);
2101 } else {
2102 // No. We have to perform subregister inserts. For each insert, create an
2103 // implicit def and a subregister insert, and save the register we create.
2104 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
2105 unsigned ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2106 MachineInstr &ImpDefMI =
2107 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2108 ImpDefReg);
2109
2110 // Now, create the subregister insert from SrcReg.
2111 unsigned InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
2112 MachineInstr &InsMI =
2113 *BuildMI(MBB, I, I.getDebugLoc(),
2114 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2115 .addUse(ImpDefReg)
2116 .addUse(SrcReg)
2117 .addImm(AArch64::dsub);
2118
2119 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2120 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2121
2122 // Save the register so that we can copy from it after.
2123 InsertRegs.push_back(InsertReg);
2124 }
2125 }
2126
2127 // Now that we've created any necessary subregister inserts, we can
2128 // create the copies.
2129 //
2130 // Perform the first copy separately as a subregister copy.
2131 unsigned CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002132 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2133 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002134 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002135
2136 // Now, perform the remaining copies as vector lane copies.
2137 unsigned LaneIdx = 1;
2138 for (unsigned InsReg : InsertRegs) {
2139 unsigned CopyTo = I.getOperand(LaneIdx).getReg();
2140 MachineInstr &CopyInst =
2141 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2142 .addUse(InsReg)
2143 .addImm(LaneIdx);
2144 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2145 ++LaneIdx;
2146 }
2147
2148 // Separately constrain the first copy's destination. Because of the
2149 // limitation in constrainOperandRegClass, we can't guarantee that this will
2150 // actually be constrained. So, do it ourselves using the second operand.
2151 const TargetRegisterClass *RC =
2152 MRI.getRegClassOrNull(I.getOperand(1).getReg());
2153 if (!RC) {
2154 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
2155 return false;
2156 }
2157
2158 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
2159 I.eraseFromParent();
2160 return true;
2161}
2162
Amara Emerson2ff22982019-03-14 22:48:15 +00002163bool AArch64InstructionSelector::selectConcatVectors(
2164 MachineInstr &I, MachineRegisterInfo &MRI) const {
2165 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
2166 "Unexpected opcode");
2167 unsigned Dst = I.getOperand(0).getReg();
2168 unsigned Op1 = I.getOperand(1).getReg();
2169 unsigned Op2 = I.getOperand(2).getReg();
2170 MachineIRBuilder MIRBuilder(I);
2171 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
2172 if (!ConcatMI)
2173 return false;
2174 I.eraseFromParent();
2175 return true;
2176}
2177
Amara Emerson1abe05c2019-02-21 20:20:16 +00002178void AArch64InstructionSelector::collectShuffleMaskIndices(
2179 MachineInstr &I, MachineRegisterInfo &MRI,
2180 SmallVectorImpl<int> &Idxs) const {
2181 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
2182 assert(
2183 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
2184 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
2185 // Find the constant indices.
2186 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
2187 MachineInstr *ScalarDef = MRI.getVRegDef(MaskDef->getOperand(i).getReg());
2188 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
2189 // Look through copies.
2190 while (ScalarDef->getOpcode() == TargetOpcode::COPY) {
2191 ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
2192 assert(ScalarDef && "Could not find def of copy operand");
2193 }
2194 assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT);
2195 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2196 }
2197}
2198
2199unsigned
2200AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
2201 MachineFunction &MF) const {
2202 Type *CPTy = CPVal->getType()->getPointerTo();
2203 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
2204 if (Align == 0)
2205 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
2206
2207 MachineConstantPool *MCP = MF.getConstantPool();
2208 return MCP->getConstantPoolIndex(CPVal, Align);
2209}
2210
2211MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
2212 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
2213 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
2214
2215 auto Adrp =
2216 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
2217 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002218
2219 MachineInstr *LoadMI = nullptr;
2220 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
2221 case 16:
2222 LoadMI =
2223 &*MIRBuilder
2224 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
2225 .addConstantPoolIndex(CPIdx, 0,
2226 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2227 break;
2228 case 8:
2229 LoadMI = &*MIRBuilder
2230 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
2231 .addConstantPoolIndex(
2232 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2233 break;
2234 default:
2235 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
2236 << *CPVal->getType());
2237 return nullptr;
2238 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00002239 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002240 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
2241 return LoadMI;
2242}
2243
2244/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
2245/// size and RB.
2246static std::pair<unsigned, unsigned>
2247getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
2248 unsigned Opc, SubregIdx;
2249 if (RB.getID() == AArch64::GPRRegBankID) {
2250 if (EltSize == 32) {
2251 Opc = AArch64::INSvi32gpr;
2252 SubregIdx = AArch64::ssub;
2253 } else if (EltSize == 64) {
2254 Opc = AArch64::INSvi64gpr;
2255 SubregIdx = AArch64::dsub;
2256 } else {
2257 llvm_unreachable("invalid elt size!");
2258 }
2259 } else {
2260 if (EltSize == 8) {
2261 Opc = AArch64::INSvi8lane;
2262 SubregIdx = AArch64::bsub;
2263 } else if (EltSize == 16) {
2264 Opc = AArch64::INSvi16lane;
2265 SubregIdx = AArch64::hsub;
2266 } else if (EltSize == 32) {
2267 Opc = AArch64::INSvi32lane;
2268 SubregIdx = AArch64::ssub;
2269 } else if (EltSize == 64) {
2270 Opc = AArch64::INSvi64lane;
2271 SubregIdx = AArch64::dsub;
2272 } else {
2273 llvm_unreachable("invalid elt size!");
2274 }
2275 }
2276 return std::make_pair(Opc, SubregIdx);
2277}
2278
2279MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Amara Emerson2ff22982019-03-14 22:48:15 +00002280 Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
2281 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00002282 // We implement a vector concat by:
2283 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2284 // 2. Insert the upper vector into the destination's upper element
2285 // TODO: some of this code is common with G_BUILD_VECTOR handling.
2286 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
2287
2288 const LLT Op1Ty = MRI.getType(Op1);
2289 const LLT Op2Ty = MRI.getType(Op2);
2290
2291 if (Op1Ty != Op2Ty) {
2292 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
2293 return nullptr;
2294 }
2295 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
2296
2297 if (Op1Ty.getSizeInBits() >= 128) {
2298 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
2299 return nullptr;
2300 }
2301
2302 // At the moment we just support 64 bit vector concats.
2303 if (Op1Ty.getSizeInBits() != 64) {
2304 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
2305 return nullptr;
2306 }
2307
2308 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
2309 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
2310 const TargetRegisterClass *DstRC =
2311 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
2312
2313 MachineInstr *WidenedOp1 =
2314 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
2315 MachineInstr *WidenedOp2 =
2316 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
2317 if (!WidenedOp1 || !WidenedOp2) {
2318 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
2319 return nullptr;
2320 }
2321
2322 // Now do the insert of the upper element.
2323 unsigned InsertOpc, InsSubRegIdx;
2324 std::tie(InsertOpc, InsSubRegIdx) =
2325 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
2326
Amara Emerson2ff22982019-03-14 22:48:15 +00002327 if (!Dst)
2328 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002329 auto InsElt =
2330 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00002331 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00002332 .addImm(1) /* Lane index */
2333 .addUse(WidenedOp2->getOperand(0).getReg())
2334 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002335 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2336 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002337}
2338
Amara Emerson761ca2e2019-03-19 21:43:05 +00002339bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
2340 // Try to match a vector splat operation into a dup instruction.
2341 // We're looking for this pattern:
2342 // %scalar:gpr(s64) = COPY $x0
2343 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
2344 // %cst0:gpr(s32) = G_CONSTANT i32 0
2345 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
2346 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
2347 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
2348 // %zerovec(<2 x s32>)
2349 //
2350 // ...into:
2351 // %splat = DUP %scalar
2352 // We use the regbank of the scalar to determine which kind of dup to use.
2353 MachineIRBuilder MIB(I);
2354 MachineRegisterInfo &MRI = *MIB.getMRI();
2355 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
2356 using namespace TargetOpcode;
2357 using namespace MIPatternMatch;
2358
2359 // Begin matching the insert.
2360 auto *InsMI =
2361 findMIFromReg(I.getOperand(1).getReg(), G_INSERT_VECTOR_ELT, MIB);
2362 if (!InsMI)
2363 return false;
2364 // Match the undef vector operand.
2365 auto *UndefMI =
2366 findMIFromReg(InsMI->getOperand(1).getReg(), G_IMPLICIT_DEF, MIB);
2367 if (!UndefMI)
2368 return false;
2369 // Match the scalar being splatted.
2370 unsigned ScalarReg = InsMI->getOperand(2).getReg();
2371 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
2372 // Match the index constant 0.
2373 int64_t Index = 0;
2374 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
2375 return false;
2376
2377 // The shuffle's second operand doesn't matter if the mask is all zero.
2378 auto *ZeroVec = findMIFromReg(I.getOperand(3).getReg(), G_BUILD_VECTOR, MIB);
2379 if (!ZeroVec)
2380 return false;
2381 int64_t Zero = 0;
2382 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
2383 return false;
2384 for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
2385 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
2386 return false; // This wasn't an all zeros vector.
2387 }
2388
2389 // We're done, now find out what kind of splat we need.
2390 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
2391 LLT EltTy = VecTy.getElementType();
2392 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
2393 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
2394 return false;
2395 }
2396 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
2397 static const unsigned OpcTable[2][2] = {
2398 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
2399 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
2400 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
2401
2402 // For FP splats, we need to widen the scalar reg via undef too.
2403 if (IsFP) {
2404 MachineInstr *Widen = emitScalarToVector(
2405 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
2406 if (!Widen)
2407 return false;
2408 ScalarReg = Widen->getOperand(0).getReg();
2409 }
2410 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
2411 if (IsFP)
2412 Dup.addImm(0);
2413 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
2414 I.eraseFromParent();
2415 return true;
2416}
2417
2418bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
2419 if (TM.getOptLevel() == CodeGenOpt::None)
2420 return false;
2421 if (tryOptVectorDup(I))
2422 return true;
2423 return false;
2424}
2425
Amara Emerson1abe05c2019-02-21 20:20:16 +00002426bool AArch64InstructionSelector::selectShuffleVector(
2427 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00002428 if (tryOptVectorShuffle(I))
2429 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00002430 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2431 unsigned Src1Reg = I.getOperand(1).getReg();
2432 const LLT Src1Ty = MRI.getType(Src1Reg);
2433 unsigned Src2Reg = I.getOperand(2).getReg();
2434 const LLT Src2Ty = MRI.getType(Src2Reg);
2435
2436 MachineBasicBlock &MBB = *I.getParent();
2437 MachineFunction &MF = *MBB.getParent();
2438 LLVMContext &Ctx = MF.getFunction().getContext();
2439
2440 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2441 // operand, it comes in as a normal vector value which we have to analyze to
2442 // find the mask indices.
2443 SmallVector<int, 8> Mask;
2444 collectShuffleMaskIndices(I, MRI, Mask);
2445 assert(!Mask.empty() && "Expected to find mask indices");
2446
2447 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
2448 // it's originated from a <1 x T> type. Those should have been lowered into
2449 // G_BUILD_VECTOR earlier.
2450 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
2451 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
2452 return false;
2453 }
2454
2455 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
2456
2457 SmallVector<Constant *, 64> CstIdxs;
2458 for (int Val : Mask) {
2459 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
2460 unsigned Offset = Byte + Val * BytesPerElt;
2461 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
2462 }
2463 }
2464
Amara Emerson8acb0d92019-03-04 19:16:00 +00002465 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002466
2467 // Use a constant pool to load the index vector for TBL.
2468 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002469 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
2470 if (!IndexLoad) {
2471 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
2472 return false;
2473 }
2474
Amara Emerson8acb0d92019-03-04 19:16:00 +00002475 if (DstTy.getSizeInBits() != 128) {
2476 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
2477 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00002478 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002479 if (!Concat) {
2480 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
2481 return false;
2482 }
2483
2484 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
2485 IndexLoad =
2486 emitScalarToVector(64, &AArch64::FPR128RegClass,
2487 IndexLoad->getOperand(0).getReg(), MIRBuilder);
2488
2489 auto TBL1 = MIRBuilder.buildInstr(
2490 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2491 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
2492 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
2493
Amara Emerson3739a202019-03-15 21:59:50 +00002494 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00002495 MIRBuilder
2496 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2497 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00002498 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
2499 I.eraseFromParent();
2500 return true;
2501 }
2502
Amara Emerson1abe05c2019-02-21 20:20:16 +00002503 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2504 // Q registers for regalloc.
2505 auto RegSeq = MIRBuilder
2506 .buildInstr(TargetOpcode::REG_SEQUENCE,
2507 {&AArch64::QQRegClass}, {Src1Reg})
2508 .addImm(AArch64::qsub0)
2509 .addUse(Src2Reg)
2510 .addImm(AArch64::qsub1);
2511
2512 auto TBL2 =
2513 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
2514 {RegSeq, IndexLoad->getOperand(0).getReg()});
2515 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
2516 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
2517 I.eraseFromParent();
2518 return true;
2519}
2520
Jessica Paquette16d67a32019-03-13 23:22:23 +00002521MachineInstr *AArch64InstructionSelector::emitLaneInsert(
2522 Optional<unsigned> DstReg, unsigned SrcReg, unsigned EltReg,
2523 unsigned LaneIdx, const RegisterBank &RB,
2524 MachineIRBuilder &MIRBuilder) const {
2525 MachineInstr *InsElt = nullptr;
2526 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
2527 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2528
2529 // Create a register to define with the insert if one wasn't passed in.
2530 if (!DstReg)
2531 DstReg = MRI.createVirtualRegister(DstRC);
2532
2533 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
2534 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
2535
2536 if (RB.getID() == AArch64::FPRRegBankID) {
2537 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
2538 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2539 .addImm(LaneIdx)
2540 .addUse(InsSub->getOperand(0).getReg())
2541 .addImm(0);
2542 } else {
2543 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
2544 .addImm(LaneIdx)
2545 .addUse(EltReg);
2546 }
2547
2548 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
2549 return InsElt;
2550}
2551
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002552bool AArch64InstructionSelector::selectInsertElt(
2553 MachineInstr &I, MachineRegisterInfo &MRI) const {
2554 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
2555
2556 // Get information on the destination.
2557 unsigned DstReg = I.getOperand(0).getReg();
2558 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002559 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002560
2561 // Get information on the element we want to insert into the destination.
2562 unsigned EltReg = I.getOperand(2).getReg();
2563 const LLT EltTy = MRI.getType(EltReg);
2564 unsigned EltSize = EltTy.getSizeInBits();
2565 if (EltSize < 16 || EltSize > 64)
2566 return false; // Don't support all element types yet.
2567
2568 // Find the definition of the index. Bail out if it's not defined by a
2569 // G_CONSTANT.
2570 unsigned IdxReg = I.getOperand(3).getReg();
2571 unsigned LaneIdx = 0;
2572 if (!getConstantValueForReg(IdxReg, MRI, LaneIdx))
2573 return false;
2574
2575 // Perform the lane insert.
2576 unsigned SrcReg = I.getOperand(1).getReg();
2577 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
2578 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00002579
2580 if (VecSize < 128) {
2581 // If the vector we're inserting into is smaller than 128 bits, widen it
2582 // to 128 to do the insert.
2583 MachineInstr *ScalarToVec = emitScalarToVector(
2584 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
2585 if (!ScalarToVec)
2586 return false;
2587 SrcReg = ScalarToVec->getOperand(0).getReg();
2588 }
2589
2590 // Create an insert into a new FPR128 register.
2591 // Note that if our vector is already 128 bits, we end up emitting an extra
2592 // register.
2593 MachineInstr *InsMI =
2594 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
2595
2596 if (VecSize < 128) {
2597 // If we had to widen to perform the insert, then we have to demote back to
2598 // the original size to get the result we want.
2599 unsigned DemoteVec = InsMI->getOperand(0).getReg();
2600 const TargetRegisterClass *RC =
2601 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
2602 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2603 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2604 return false;
2605 }
2606 unsigned SubReg = 0;
2607 if (!getSubRegForClass(RC, TRI, SubReg))
2608 return false;
2609 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2610 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
2611 << "\n");
2612 return false;
2613 }
2614 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2615 .addReg(DemoteVec, 0, SubReg);
2616 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2617 } else {
2618 // No widening needed.
2619 InsMI->getOperand(0).setReg(DstReg);
2620 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2621 }
2622
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002623 I.eraseFromParent();
2624 return true;
2625}
2626
Amara Emerson5ec14602018-12-10 18:44:58 +00002627bool AArch64InstructionSelector::selectBuildVector(
2628 MachineInstr &I, MachineRegisterInfo &MRI) const {
2629 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
2630 // Until we port more of the optimized selections, for now just use a vector
2631 // insert sequence.
2632 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2633 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
2634 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00002635 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00002636 return false; // Don't support all element types yet.
2637 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002638 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00002639
2640 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002641 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00002642 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
2643 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002644 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00002645 return false;
2646
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002647 unsigned DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002648 unsigned DstSize = DstTy.getSizeInBits();
2649
2650 // Keep track of the last MI we inserted. Later on, we might be able to save
2651 // a copy using it.
2652 MachineInstr *PrevMI = nullptr;
2653 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00002654 // Note that if we don't do a subregister copy, we can end up making an
2655 // extra register.
2656 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
2657 MIRBuilder);
2658 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00002659 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002660
2661 // If DstTy's size in bits is less than 128, then emit a subregister copy
2662 // from DstVec to the last register we've defined.
2663 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00002664 // Force this to be FPR using the destination vector.
2665 const TargetRegisterClass *RC =
2666 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00002667 if (!RC)
2668 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00002669 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
2670 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
2671 return false;
2672 }
2673
2674 unsigned SubReg = 0;
2675 if (!getSubRegForClass(RC, TRI, SubReg))
2676 return false;
2677 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
2678 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
2679 << "\n");
2680 return false;
2681 }
Jessica Paquette245047d2019-01-24 22:00:41 +00002682
2683 unsigned Reg = MRI.createVirtualRegister(RC);
2684 unsigned DstReg = I.getOperand(0).getReg();
2685
Amara Emerson86271782019-03-18 19:20:10 +00002686 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2687 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00002688 MachineOperand &RegOp = I.getOperand(1);
2689 RegOp.setReg(Reg);
2690 RBI.constrainGenericRegister(DstReg, *RC, MRI);
2691 } else {
2692 // We don't need a subregister copy. Save a copy by re-using the
2693 // destination register on the final insert.
2694 assert(PrevMI && "PrevMI was null?");
2695 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
2696 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
2697 }
2698
Amara Emerson5ec14602018-12-10 18:44:58 +00002699 I.eraseFromParent();
2700 return true;
2701}
2702
Jessica Paquette22c62152019-04-02 19:57:26 +00002703/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
2704/// intrinsic.
2705static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
2706 switch (NumBytesToStore) {
2707 // TODO: 1, 2, and 4 byte stores.
2708 case 8:
2709 return AArch64::STLXRX;
2710 default:
2711 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
2712 << NumBytesToStore << ")\n");
2713 break;
2714 }
2715 return 0;
2716}
2717
2718bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
2719 MachineInstr &I, MachineRegisterInfo &MRI) const {
2720 // Find the intrinsic ID.
2721 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
2722 return Op.isIntrinsicID();
2723 });
2724 if (IntrinOp == I.operands_end())
2725 return false;
2726 unsigned IntrinID = IntrinOp->getIntrinsicID();
2727 MachineIRBuilder MIRBuilder(I);
2728
2729 // Select the instruction.
2730 switch (IntrinID) {
2731 default:
2732 return false;
2733 case Intrinsic::trap:
2734 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
2735 break;
2736 case Intrinsic::aarch64_stlxr:
2737 unsigned StatReg = I.getOperand(0).getReg();
2738 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
2739 "Status register must be 32 bits!");
2740 unsigned SrcReg = I.getOperand(2).getReg();
2741
2742 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
2743 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
2744 return false;
2745 }
2746
2747 unsigned PtrReg = I.getOperand(3).getReg();
2748 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
2749
2750 // Expect only one memory operand.
2751 if (!I.hasOneMemOperand())
2752 return false;
2753
2754 const MachineMemOperand *MemOp = *I.memoperands_begin();
2755 unsigned NumBytesToStore = MemOp->getSize();
2756 unsigned Opc = getStlxrOpcode(NumBytesToStore);
2757 if (!Opc)
2758 return false;
2759
2760 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
2761 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
2762 }
2763
2764 I.eraseFromParent();
2765 return true;
2766}
2767
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002768/// SelectArithImmed - Select an immediate value that can be represented as
2769/// a 12-bit value shifted left by either 0 or 12. If so, return true with
2770/// Val set to the 12-bit value and Shift set to the shifter operand.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002771InstructionSelector::ComplexRendererFns
Daniel Sanders2deea182017-04-22 15:11:04 +00002772AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002773 MachineInstr &MI = *Root.getParent();
2774 MachineBasicBlock &MBB = *MI.getParent();
2775 MachineFunction &MF = *MBB.getParent();
2776 MachineRegisterInfo &MRI = MF.getRegInfo();
2777
2778 // This function is called from the addsub_shifted_imm ComplexPattern,
2779 // which lists [imm] as the list of opcode it's interested in, however
2780 // we still need to check whether the operand is actually an immediate
2781 // here because the ComplexPattern opcode list is only used in
2782 // root-level opcode matching.
2783 uint64_t Immed;
2784 if (Root.isImm())
2785 Immed = Root.getImm();
2786 else if (Root.isCImm())
2787 Immed = Root.getCImm()->getZExtValue();
2788 else if (Root.isReg()) {
2789 MachineInstr *Def = MRI.getVRegDef(Root.getReg());
2790 if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002791 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002792 MachineOperand &Op1 = Def->getOperand(1);
2793 if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002794 return None;
Daniel Sanders0e642022017-03-16 18:04:50 +00002795 Immed = Op1.getCImm()->getZExtValue();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002796 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002797 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002798
2799 unsigned ShiftAmt;
2800
2801 if (Immed >> 12 == 0) {
2802 ShiftAmt = 0;
2803 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
2804 ShiftAmt = 12;
2805 Immed = Immed >> 12;
2806 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002807 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002808
2809 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00002810 return {{
2811 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
2812 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
2813 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002814}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002815
Daniel Sandersea8711b2017-10-16 03:36:29 +00002816/// Select a "register plus unscaled signed 9-bit immediate" address. This
2817/// should only match when there is an offset that is not valid for a scaled
2818/// immediate addressing mode. The "Size" argument is the size in bytes of the
2819/// memory reference, which is needed here to know what is valid for a scaled
2820/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002821InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002822AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
2823 unsigned Size) const {
2824 MachineRegisterInfo &MRI =
2825 Root.getParent()->getParent()->getParent()->getRegInfo();
2826
2827 if (!Root.isReg())
2828 return None;
2829
2830 if (!isBaseWithConstantOffset(Root, MRI))
2831 return None;
2832
2833 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2834 if (!RootDef)
2835 return None;
2836
2837 MachineOperand &OffImm = RootDef->getOperand(2);
2838 if (!OffImm.isReg())
2839 return None;
2840 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
2841 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
2842 return None;
2843 int64_t RHSC;
2844 MachineOperand &RHSOp1 = RHS->getOperand(1);
2845 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
2846 return None;
2847 RHSC = RHSOp1.getCImm()->getSExtValue();
2848
2849 // If the offset is valid as a scaled immediate, don't match here.
2850 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
2851 return None;
2852 if (RHSC >= -256 && RHSC < 256) {
2853 MachineOperand &Base = RootDef->getOperand(1);
2854 return {{
2855 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
2856 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
2857 }};
2858 }
2859 return None;
2860}
2861
2862/// Select a "register plus scaled unsigned 12-bit immediate" address. The
2863/// "Size" argument is the size in bytes of the memory reference, which
2864/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00002865InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00002866AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
2867 unsigned Size) const {
2868 MachineRegisterInfo &MRI =
2869 Root.getParent()->getParent()->getParent()->getRegInfo();
2870
2871 if (!Root.isReg())
2872 return None;
2873
2874 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
2875 if (!RootDef)
2876 return None;
2877
2878 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
2879 return {{
2880 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
2881 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2882 }};
2883 }
2884
2885 if (isBaseWithConstantOffset(Root, MRI)) {
2886 MachineOperand &LHS = RootDef->getOperand(1);
2887 MachineOperand &RHS = RootDef->getOperand(2);
2888 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
2889 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
2890 if (LHSDef && RHSDef) {
2891 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
2892 unsigned Scale = Log2_32(Size);
2893 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
2894 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00002895 return {{
2896 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
2897 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2898 }};
2899
Daniel Sandersea8711b2017-10-16 03:36:29 +00002900 return {{
2901 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
2902 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
2903 }};
2904 }
2905 }
2906 }
2907
2908 // Before falling back to our general case, check if the unscaled
2909 // instructions can handle this. If so, that's preferable.
2910 if (selectAddrModeUnscaled(Root, Size).hasValue())
2911 return None;
2912
2913 return {{
2914 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2915 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
2916 }};
2917}
2918
Volkan Kelesf7f25682018-01-16 18:44:05 +00002919void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
2920 const MachineInstr &MI) const {
2921 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2922 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
2923 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
2924 assert(CstVal && "Expected constant value");
2925 MIB.addImm(CstVal.getValue());
2926}
2927
Daniel Sanders0b5293f2017-04-06 09:49:34 +00002928namespace llvm {
2929InstructionSelector *
2930createAArch64InstructionSelector(const AArch64TargetMachine &TM,
2931 AArch64Subtarget &Subtarget,
2932 AArch64RegisterBankInfo &RBI) {
2933 return new AArch64InstructionSelector(TM, Subtarget, RBI);
2934}
2935}