blob: 59cfcca3157f9fb8b0f59b7c97cb898324fddb6b [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Daniel Sandersf76f3152017-11-16 00:46:35 +000054 bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
57private:
58 /// tblgen-erated 'select' implementation, used as the initial selector for
59 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000060 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000061
Amara Emersoncac11512019-07-03 01:49:06 +000062 // A lowering phase that runs before any selection attempts.
63
64 void preISelLower(MachineInstr &I) const;
65
66 // An early selection function that runs before the selectImpl() call.
67 bool earlySelect(MachineInstr &I) const;
68
69 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette7a1dcc52019-07-18 21:50:11 +000070 bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersoncac11512019-07-03 01:49:06 +000071
Jessica Paquette41affad2019-07-20 01:55:35 +000072 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
73 void contractCrossBankCopyIntoStore(MachineInstr &I,
74 MachineRegisterInfo &MRI) const;
75
Daniel Sanders0b5293f2017-04-06 09:49:34 +000076 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
77 MachineRegisterInfo &MRI) const;
78 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
79 MachineRegisterInfo &MRI) const;
80
81 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
82 MachineRegisterInfo &MRI) const;
83
Amara Emerson9bf092d2019-04-09 21:22:43 +000084 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
85 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
86
Amara Emerson5ec14602018-12-10 18:44:58 +000087 // Helper to generate an equivalent of scalar_to_vector into a new register,
88 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000089 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000090 const TargetRegisterClass *DstRC,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +000091 Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000092 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +000093
94 /// Emit a lane insert into \p DstReg, or a new vector register if None is
95 /// provided.
96 ///
97 /// The lane inserted into is defined by \p LaneIdx. The vector source
98 /// register is given by \p SrcReg. The register containing the element is
99 /// given by \p EltReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000100 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
101 Register EltReg, unsigned LaneIdx,
Jessica Paquette16d67a32019-03-13 23:22:23 +0000102 const RegisterBank &RB,
103 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +0000104 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +0000105 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +0000106 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +0000107 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +0000108
Amara Emerson1abe05c2019-02-21 20:20:16 +0000109 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +0000110 SmallVectorImpl<Optional<int>> &Idxs) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000111 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +0000112 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000113 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000114 bool selectSplitVectorUnmerge(MachineInstr &I,
115 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000116 bool selectIntrinsicWithSideEffects(MachineInstr &I,
117 MachineRegisterInfo &MRI) const;
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +0000118 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000119 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000120 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000121 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson6e71b342019-06-21 18:10:41 +0000122 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
123 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
124
Amara Emerson1abe05c2019-02-21 20:20:16 +0000125 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
126 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
127 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000128
129 // Emit a vector concat operation.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000130 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
131 Register Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000132 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette99316042019-07-02 19:44:16 +0000133 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
134 MachineOperand &Predicate,
135 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette728b18f2019-07-24 23:11:01 +0000136 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
137 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette99316042019-07-02 19:44:16 +0000138 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
139 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette55d19242019-07-08 22:58:36 +0000140 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
141 MachineIRBuilder &MIRBuilder) const;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000142 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
Amara Emersond61b89b2019-03-14 22:48:18 +0000143 const RegisterBank &DstRB, LLT ScalarTy,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000144 Register VecReg, unsigned LaneIdx,
Amara Emersond61b89b2019-03-14 22:48:18 +0000145 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000146
Jessica Paquettea3843fe2019-05-01 22:39:43 +0000147 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
148 /// materialized using a FMOV instruction, then update MI and return it.
149 /// Otherwise, do nothing and return a nullptr.
150 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
151 MachineRegisterInfo &MRI) const;
152
Jessica Paquette49537bb2019-06-17 18:40:06 +0000153 /// Emit a CSet for a compare.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000154 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +0000155 MachineIRBuilder &MIRBuilder) const;
156
Amara Emersoncac11512019-07-03 01:49:06 +0000157 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
158 // We use these manually instead of using the importer since it doesn't
159 // support SDNodeXForm.
160 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
161 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
162 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
163 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
164
Jessica Paquettee4c46c32019-08-02 18:12:53 +0000165 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000166 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Jessica Paquettee4c46c32019-08-02 18:12:53 +0000167 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000168
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000169 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
170 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000171
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000172 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000173 return selectAddrModeUnscaled(Root, 1);
174 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000175 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000176 return selectAddrModeUnscaled(Root, 2);
177 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000178 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000179 return selectAddrModeUnscaled(Root, 4);
180 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000181 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000182 return selectAddrModeUnscaled(Root, 8);
183 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000184 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000185 return selectAddrModeUnscaled(Root, 16);
186 }
187
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000188 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
189 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000190 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000191 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000192 return selectAddrModeIndexed(Root, Width / 8);
193 }
Jessica Paquette2b404d02019-07-23 16:09:42 +0000194
195 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
196 const MachineRegisterInfo &MRI) const;
197 ComplexRendererFns
198 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
199 unsigned SizeInBytes) const;
Jessica Paquette7a1dcc52019-07-18 21:50:11 +0000200 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
Jessica Paquette2b404d02019-07-23 16:09:42 +0000201 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
202 unsigned SizeInBytes) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000203
Volkan Kelesf7f25682018-01-16 18:44:05 +0000204 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
205
Amara Emerson1e8c1642018-07-31 00:09:02 +0000206 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
207 void materializeLargeCMVal(MachineInstr &I, const Value *V,
Peter Collingbourne33773d52019-07-31 20:14:09 +0000208 unsigned OpFlags) const;
Amara Emerson1e8c1642018-07-31 00:09:02 +0000209
Amara Emerson761ca2e2019-03-19 21:43:05 +0000210 // Optimization methods.
Amara Emerson761ca2e2019-03-19 21:43:05 +0000211 bool tryOptVectorShuffle(MachineInstr &I) const;
212 bool tryOptVectorDup(MachineInstr &MI) const;
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000213 bool tryOptSelect(MachineInstr &MI) const;
Jessica Paquette55d19242019-07-08 22:58:36 +0000214 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
215 MachineOperand &Predicate,
216 MachineIRBuilder &MIRBuilder) const;
Amara Emerson761ca2e2019-03-19 21:43:05 +0000217
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000218 const AArch64TargetMachine &TM;
219 const AArch64Subtarget &STI;
220 const AArch64InstrInfo &TII;
221 const AArch64RegisterInfo &TRI;
222 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000223
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000224#define GET_GLOBALISEL_PREDICATES_DECL
225#include "AArch64GenGlobalISel.inc"
226#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000227
228// We declare the temporaries used by selectImpl() in the class to minimize the
229// cost of constructing placeholder values.
230#define GET_GLOBALISEL_TEMPORARIES_DECL
231#include "AArch64GenGlobalISel.inc"
232#undef GET_GLOBALISEL_TEMPORARIES_DECL
233};
234
235} // end anonymous namespace
236
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000237#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000238#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000239#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000240
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000241AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000242 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
243 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000244 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000245 TRI(*STI.getRegisterInfo()), RBI(RBI),
246#define GET_GLOBALISEL_PREDICATES_INIT
247#include "AArch64GenGlobalISel.inc"
248#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000249#define GET_GLOBALISEL_TEMPORARIES_INIT
250#include "AArch64GenGlobalISel.inc"
251#undef GET_GLOBALISEL_TEMPORARIES_INIT
252{
253}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000254
Tim Northoverfb8d9892016-10-12 22:49:15 +0000255// FIXME: This should be target-independent, inferred from the types declared
256// for each class in the bank.
257static const TargetRegisterClass *
258getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000259 const RegisterBankInfo &RBI,
260 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000261 if (RB.getID() == AArch64::GPRRegBankID) {
262 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000263 return GetAllRegSet ? &AArch64::GPR32allRegClass
264 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000265 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000266 return GetAllRegSet ? &AArch64::GPR64allRegClass
267 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000268 return nullptr;
269 }
270
271 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000272 if (Ty.getSizeInBits() <= 16)
273 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000274 if (Ty.getSizeInBits() == 32)
275 return &AArch64::FPR32RegClass;
276 if (Ty.getSizeInBits() == 64)
277 return &AArch64::FPR64RegClass;
278 if (Ty.getSizeInBits() == 128)
279 return &AArch64::FPR128RegClass;
280 return nullptr;
281 }
282
283 return nullptr;
284}
285
Jessica Paquette245047d2019-01-24 22:00:41 +0000286/// Given a register bank, and size in bits, return the smallest register class
287/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000288static const TargetRegisterClass *
289getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
290 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000291 unsigned RegBankID = RB.getID();
292
293 if (RegBankID == AArch64::GPRRegBankID) {
294 if (SizeInBits <= 32)
295 return GetAllRegSet ? &AArch64::GPR32allRegClass
296 : &AArch64::GPR32RegClass;
297 if (SizeInBits == 64)
298 return GetAllRegSet ? &AArch64::GPR64allRegClass
299 : &AArch64::GPR64RegClass;
300 }
301
302 if (RegBankID == AArch64::FPRRegBankID) {
303 switch (SizeInBits) {
304 default:
305 return nullptr;
306 case 8:
307 return &AArch64::FPR8RegClass;
308 case 16:
309 return &AArch64::FPR16RegClass;
310 case 32:
311 return &AArch64::FPR32RegClass;
312 case 64:
313 return &AArch64::FPR64RegClass;
314 case 128:
315 return &AArch64::FPR128RegClass;
316 }
317 }
318
319 return nullptr;
320}
321
322/// Returns the correct subregister to use for a given register class.
323static bool getSubRegForClass(const TargetRegisterClass *RC,
324 const TargetRegisterInfo &TRI, unsigned &SubReg) {
325 switch (TRI.getRegSizeInBits(*RC)) {
326 case 8:
327 SubReg = AArch64::bsub;
328 break;
329 case 16:
330 SubReg = AArch64::hsub;
331 break;
332 case 32:
333 if (RC == &AArch64::GPR32RegClass)
334 SubReg = AArch64::sub_32;
335 else
336 SubReg = AArch64::ssub;
337 break;
338 case 64:
339 SubReg = AArch64::dsub;
340 break;
341 default:
342 LLVM_DEBUG(
343 dbgs() << "Couldn't find appropriate subregister for register class.");
344 return false;
345 }
346
347 return true;
348}
349
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000350/// Check whether \p I is a currently unsupported binary operation:
351/// - it has an unsized type
352/// - an operand is not a vreg
353/// - all operands are not in the same bank
354/// These are checks that should someday live in the verifier, but right now,
355/// these are mostly limitations of the aarch64 selector.
356static bool unsupportedBinOp(const MachineInstr &I,
357 const AArch64RegisterBankInfo &RBI,
358 const MachineRegisterInfo &MRI,
359 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000360 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000361 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000362 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000363 return true;
364 }
365
366 const RegisterBank *PrevOpBank = nullptr;
367 for (auto &MO : I.operands()) {
368 // FIXME: Support non-register operands.
369 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000370 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000371 return true;
372 }
373
374 // FIXME: Can generic operations have physical registers operands? If
375 // so, this will need to be taught about that, and we'll need to get the
376 // bank out of the minimal class for the register.
377 // Either way, this needs to be documented (and possibly verified).
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000378 if (!Register::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000379 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000380 return true;
381 }
382
383 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
384 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000385 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000386 return true;
387 }
388
389 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000390 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000391 return true;
392 }
393 PrevOpBank = OpBank;
394 }
395 return false;
396}
397
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000398/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000399/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000400/// and of size \p OpSize.
401/// \returns \p GenericOpc if the combination is unsupported.
402static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
403 unsigned OpSize) {
404 switch (RegBankID) {
405 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000406 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000407 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000408 case TargetOpcode::G_SHL:
409 return AArch64::LSLVWr;
410 case TargetOpcode::G_LSHR:
411 return AArch64::LSRVWr;
412 case TargetOpcode::G_ASHR:
413 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000414 default:
415 return GenericOpc;
416 }
Tim Northover55782222016-10-18 20:03:48 +0000417 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000418 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000419 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000420 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000421 case TargetOpcode::G_SHL:
422 return AArch64::LSLVXr;
423 case TargetOpcode::G_LSHR:
424 return AArch64::LSRVXr;
425 case TargetOpcode::G_ASHR:
426 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000427 default:
428 return GenericOpc;
429 }
430 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000431 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000432 case AArch64::FPRRegBankID:
433 switch (OpSize) {
434 case 32:
435 switch (GenericOpc) {
436 case TargetOpcode::G_FADD:
437 return AArch64::FADDSrr;
438 case TargetOpcode::G_FSUB:
439 return AArch64::FSUBSrr;
440 case TargetOpcode::G_FMUL:
441 return AArch64::FMULSrr;
442 case TargetOpcode::G_FDIV:
443 return AArch64::FDIVSrr;
444 default:
445 return GenericOpc;
446 }
447 case 64:
448 switch (GenericOpc) {
449 case TargetOpcode::G_FADD:
450 return AArch64::FADDDrr;
451 case TargetOpcode::G_FSUB:
452 return AArch64::FSUBDrr;
453 case TargetOpcode::G_FMUL:
454 return AArch64::FMULDrr;
455 case TargetOpcode::G_FDIV:
456 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000457 case TargetOpcode::G_OR:
458 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000459 default:
460 return GenericOpc;
461 }
462 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000463 break;
464 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000465 return GenericOpc;
466}
467
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000468/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
469/// appropriate for the (value) register bank \p RegBankID and of memory access
470/// size \p OpSize. This returns the variant with the base+unsigned-immediate
471/// addressing mode (e.g., LDRXui).
472/// \returns \p GenericOpc if the combination is unsupported.
473static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
474 unsigned OpSize) {
475 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
476 switch (RegBankID) {
477 case AArch64::GPRRegBankID:
478 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000479 case 8:
480 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
481 case 16:
482 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000483 case 32:
484 return isStore ? AArch64::STRWui : AArch64::LDRWui;
485 case 64:
486 return isStore ? AArch64::STRXui : AArch64::LDRXui;
487 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000488 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000489 case AArch64::FPRRegBankID:
490 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000491 case 8:
492 return isStore ? AArch64::STRBui : AArch64::LDRBui;
493 case 16:
494 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000495 case 32:
496 return isStore ? AArch64::STRSui : AArch64::LDRSui;
497 case 64:
498 return isStore ? AArch64::STRDui : AArch64::LDRDui;
499 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000500 break;
501 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000502 return GenericOpc;
503}
504
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000505#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000506/// Helper function that verifies that we have a valid copy at the end of
507/// selectCopy. Verifies that the source and dest have the expected sizes and
508/// then returns true.
509static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
510 const MachineRegisterInfo &MRI,
511 const TargetRegisterInfo &TRI,
512 const RegisterBankInfo &RBI) {
513 const unsigned DstReg = I.getOperand(0).getReg();
514 const unsigned SrcReg = I.getOperand(1).getReg();
515 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
516 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000517
Jessica Paquette245047d2019-01-24 22:00:41 +0000518 // Make sure the size of the source and dest line up.
519 assert(
520 (DstSize == SrcSize ||
521 // Copies are a mean to setup initial types, the number of
522 // bits may not exactly match.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000523 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
Jessica Paquette245047d2019-01-24 22:00:41 +0000524 // Copies are a mean to copy bits around, as long as we are
525 // on the same register class, that's fine. Otherwise, that
526 // means we need some SUBREG_TO_REG or AND & co.
527 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
528 "Copy with different width?!");
529
530 // Check the size of the destination.
531 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
532 "GPRs cannot get more than 64-bit width values");
533
534 return true;
535}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000536#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000537
538/// Helper function for selectCopy. Inserts a subregister copy from
539/// \p *From to \p *To, linking it up to \p I.
540///
541/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
542///
543/// CopyReg (From class) = COPY SrcReg
544/// SubRegCopy (To class) = COPY CopyReg:SubReg
545/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000546static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Jessica Paquette245047d2019-01-24 22:00:41 +0000547 const RegisterBankInfo &RBI, unsigned SrcReg,
548 const TargetRegisterClass *From,
549 const TargetRegisterClass *To,
550 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000551 MachineIRBuilder MIB(I);
552 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000553 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
554 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000555 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000556 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000557
558 // It's possible that the destination register won't be constrained. Make
559 // sure that happens.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000560 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
Jessica Paquette245047d2019-01-24 22:00:41 +0000561 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
562
Amara Emersondb211892018-02-20 05:11:57 +0000563 return true;
564}
565
Jessica Paquette910630c2019-05-03 22:37:46 +0000566/// Helper function to get the source and destination register classes for a
567/// copy. Returns a std::pair containing the source register class for the
568/// copy, and the destination register class for the copy. If a register class
569/// cannot be determined, then it will be nullptr.
570static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
571getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
572 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
573 const RegisterBankInfo &RBI) {
574 unsigned DstReg = I.getOperand(0).getReg();
575 unsigned SrcReg = I.getOperand(1).getReg();
576 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
577 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
578 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
579 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
580
581 // Special casing for cross-bank copies of s1s. We can technically represent
582 // a 1-bit value with any size of register. The minimum size for a GPR is 32
583 // bits. So, we need to put the FPR on 32 bits as well.
584 //
585 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
586 // then we can pull it into the helpers that get the appropriate class for a
587 // register bank. Or make a new helper that carries along some constraint
588 // information.
589 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
590 SrcSize = DstSize = 32;
591
592 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
593 getMinClassForRegBank(DstRegBank, DstSize, true)};
594}
595
Quentin Colombetcb629a82016-10-12 03:57:49 +0000596static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
597 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
598 const RegisterBankInfo &RBI) {
599
600 unsigned DstReg = I.getOperand(0).getReg();
Amara Emersondb211892018-02-20 05:11:57 +0000601 unsigned SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000602 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
603 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
Jessica Paquette910630c2019-05-03 22:37:46 +0000604
605 // Find the correct register classes for the source and destination registers.
606 const TargetRegisterClass *SrcRC;
607 const TargetRegisterClass *DstRC;
608 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
609
Jessica Paquette245047d2019-01-24 22:00:41 +0000610 if (!DstRC) {
611 LLVM_DEBUG(dbgs() << "Unexpected dest size "
612 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000613 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000614 }
615
Jessica Paquette245047d2019-01-24 22:00:41 +0000616 // A couple helpers below, for making sure that the copy we produce is valid.
617
618 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
619 // to verify that the src and dst are the same size, since that's handled by
620 // the SUBREG_TO_REG.
621 bool KnownValid = false;
622
623 // Returns true, or asserts if something we don't expect happens. Instead of
624 // returning true, we return isValidCopy() to ensure that we verify the
625 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000626 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000627 // If we have a bitcast or something, we can't have physical registers.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000628 assert((I.isCopy() ||
629 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
630 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
631 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000632 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000633 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000634 return true;
635 };
636
637 // Is this a copy? If so, then we may need to insert a subregister copy, or
638 // a SUBREG_TO_REG.
639 if (I.isCopy()) {
640 // Yes. Check if there's anything to fix up.
Amara Emerson7e9f3482018-02-18 17:10:49 +0000641 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000642 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
643 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000644 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000645
646 // Is this a cross-bank copy?
647 if (DstRegBank.getID() != SrcRegBank.getID()) {
648 // If we're doing a cross-bank copy on different-sized registers, we need
649 // to do a bit more work.
650 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
651 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
652
653 if (SrcSize > DstSize) {
654 // We're doing a cross-bank copy into a smaller register. We need a
655 // subregister copy. First, get a register class that's on the same bank
656 // as the destination, but the same size as the source.
657 const TargetRegisterClass *SubregRC =
658 getMinClassForRegBank(DstRegBank, SrcSize, true);
659 assert(SubregRC && "Didn't get a register class for subreg?");
660
661 // Get the appropriate subregister for the destination.
662 unsigned SubReg = 0;
663 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
664 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
665 return false;
666 }
667
668 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000669 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000670 return CheckCopy();
671 }
672
673 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
674 SrcSize == 16) {
675 // Special case for FPR16 to GPR32.
676 // FIXME: This can probably be generalized like the above case.
677 unsigned PromoteReg =
678 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
679 BuildMI(*I.getParent(), I, I.getDebugLoc(),
680 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
681 .addImm(0)
682 .addUse(SrcReg)
683 .addImm(AArch64::hsub);
684 MachineOperand &RegOp = I.getOperand(1);
685 RegOp.setReg(PromoteReg);
686
687 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
688 KnownValid = true;
689 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000690 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000691
692 // If the destination is a physical register, then there's nothing to
693 // change, so we're done.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000694 if (Register::isPhysicalRegister(DstReg))
Jessica Paquette245047d2019-01-24 22:00:41 +0000695 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000696 }
697
Jessica Paquette245047d2019-01-24 22:00:41 +0000698 // No need to constrain SrcReg. It will get constrained when we hit another
699 // of its use or its defs. Copies do not have constraints.
700 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000701 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
702 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000703 return false;
704 }
705 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000706 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000707}
708
Tim Northover69271c62016-10-12 22:49:11 +0000709static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
710 if (!DstTy.isScalar() || !SrcTy.isScalar())
711 return GenericOpc;
712
713 const unsigned DstSize = DstTy.getSizeInBits();
714 const unsigned SrcSize = SrcTy.getSizeInBits();
715
716 switch (DstSize) {
717 case 32:
718 switch (SrcSize) {
719 case 32:
720 switch (GenericOpc) {
721 case TargetOpcode::G_SITOFP:
722 return AArch64::SCVTFUWSri;
723 case TargetOpcode::G_UITOFP:
724 return AArch64::UCVTFUWSri;
725 case TargetOpcode::G_FPTOSI:
726 return AArch64::FCVTZSUWSr;
727 case TargetOpcode::G_FPTOUI:
728 return AArch64::FCVTZUUWSr;
729 default:
730 return GenericOpc;
731 }
732 case 64:
733 switch (GenericOpc) {
734 case TargetOpcode::G_SITOFP:
735 return AArch64::SCVTFUXSri;
736 case TargetOpcode::G_UITOFP:
737 return AArch64::UCVTFUXSri;
738 case TargetOpcode::G_FPTOSI:
739 return AArch64::FCVTZSUWDr;
740 case TargetOpcode::G_FPTOUI:
741 return AArch64::FCVTZUUWDr;
742 default:
743 return GenericOpc;
744 }
745 default:
746 return GenericOpc;
747 }
748 case 64:
749 switch (SrcSize) {
750 case 32:
751 switch (GenericOpc) {
752 case TargetOpcode::G_SITOFP:
753 return AArch64::SCVTFUWDri;
754 case TargetOpcode::G_UITOFP:
755 return AArch64::UCVTFUWDri;
756 case TargetOpcode::G_FPTOSI:
757 return AArch64::FCVTZSUXSr;
758 case TargetOpcode::G_FPTOUI:
759 return AArch64::FCVTZUUXSr;
760 default:
761 return GenericOpc;
762 }
763 case 64:
764 switch (GenericOpc) {
765 case TargetOpcode::G_SITOFP:
766 return AArch64::SCVTFUXDri;
767 case TargetOpcode::G_UITOFP:
768 return AArch64::UCVTFUXDri;
769 case TargetOpcode::G_FPTOSI:
770 return AArch64::FCVTZSUXDr;
771 case TargetOpcode::G_FPTOUI:
772 return AArch64::FCVTZUUXDr;
773 default:
774 return GenericOpc;
775 }
776 default:
777 return GenericOpc;
778 }
779 default:
780 return GenericOpc;
781 };
782 return GenericOpc;
783}
784
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000785static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
786 const RegisterBankInfo &RBI) {
787 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
788 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
789 AArch64::GPRRegBankID);
790 LLT Ty = MRI.getType(I.getOperand(0).getReg());
791 if (Ty == LLT::scalar(32))
792 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
793 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
794 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
795 return 0;
796}
797
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +0000798/// Helper function to select the opcode for a G_FCMP.
799static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
800 // If this is a compare against +0.0, then we don't have to explicitly
801 // materialize a constant.
802 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
803 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
804 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
805 if (OpSize != 32 && OpSize != 64)
806 return 0;
807 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
808 {AArch64::FCMPSri, AArch64::FCMPDri}};
809 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
810}
811
Jessica Paquette55d19242019-07-08 22:58:36 +0000812/// Returns true if \p P is an unsigned integer comparison predicate.
813static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
814 switch (P) {
815 default:
816 return false;
817 case CmpInst::ICMP_UGT:
818 case CmpInst::ICMP_UGE:
819 case CmpInst::ICMP_ULT:
820 case CmpInst::ICMP_ULE:
821 return true;
822 }
823}
824
Tim Northover6c02ad52016-10-12 22:49:04 +0000825static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
826 switch (P) {
827 default:
828 llvm_unreachable("Unknown condition code!");
829 case CmpInst::ICMP_NE:
830 return AArch64CC::NE;
831 case CmpInst::ICMP_EQ:
832 return AArch64CC::EQ;
833 case CmpInst::ICMP_SGT:
834 return AArch64CC::GT;
835 case CmpInst::ICMP_SGE:
836 return AArch64CC::GE;
837 case CmpInst::ICMP_SLT:
838 return AArch64CC::LT;
839 case CmpInst::ICMP_SLE:
840 return AArch64CC::LE;
841 case CmpInst::ICMP_UGT:
842 return AArch64CC::HI;
843 case CmpInst::ICMP_UGE:
844 return AArch64CC::HS;
845 case CmpInst::ICMP_ULT:
846 return AArch64CC::LO;
847 case CmpInst::ICMP_ULE:
848 return AArch64CC::LS;
849 }
850}
851
Tim Northover7dd378d2016-10-12 22:49:07 +0000852static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
853 AArch64CC::CondCode &CondCode,
854 AArch64CC::CondCode &CondCode2) {
855 CondCode2 = AArch64CC::AL;
856 switch (P) {
857 default:
858 llvm_unreachable("Unknown FP condition!");
859 case CmpInst::FCMP_OEQ:
860 CondCode = AArch64CC::EQ;
861 break;
862 case CmpInst::FCMP_OGT:
863 CondCode = AArch64CC::GT;
864 break;
865 case CmpInst::FCMP_OGE:
866 CondCode = AArch64CC::GE;
867 break;
868 case CmpInst::FCMP_OLT:
869 CondCode = AArch64CC::MI;
870 break;
871 case CmpInst::FCMP_OLE:
872 CondCode = AArch64CC::LS;
873 break;
874 case CmpInst::FCMP_ONE:
875 CondCode = AArch64CC::MI;
876 CondCode2 = AArch64CC::GT;
877 break;
878 case CmpInst::FCMP_ORD:
879 CondCode = AArch64CC::VC;
880 break;
881 case CmpInst::FCMP_UNO:
882 CondCode = AArch64CC::VS;
883 break;
884 case CmpInst::FCMP_UEQ:
885 CondCode = AArch64CC::EQ;
886 CondCode2 = AArch64CC::VS;
887 break;
888 case CmpInst::FCMP_UGT:
889 CondCode = AArch64CC::HI;
890 break;
891 case CmpInst::FCMP_UGE:
892 CondCode = AArch64CC::PL;
893 break;
894 case CmpInst::FCMP_ULT:
895 CondCode = AArch64CC::LT;
896 break;
897 case CmpInst::FCMP_ULE:
898 CondCode = AArch64CC::LE;
899 break;
900 case CmpInst::FCMP_UNE:
901 CondCode = AArch64CC::NE;
902 break;
903 }
904}
905
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000906bool AArch64InstructionSelector::selectCompareBranch(
907 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
908
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000909 const Register CondReg = I.getOperand(0).getReg();
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000910 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
911 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000912 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
913 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000914 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
915 return false;
916
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000917 Register LHS = CCMI->getOperand(2).getReg();
918 Register RHS = CCMI->getOperand(3).getReg();
Amara Emerson7a4d2df2019-07-10 19:21:43 +0000919 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
920 if (!VRegAndVal)
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000921 std::swap(RHS, LHS);
922
Amara Emerson7a4d2df2019-07-10 19:21:43 +0000923 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
924 if (!VRegAndVal || VRegAndVal->Value != 0) {
925 MachineIRBuilder MIB(I);
926 // If we can't select a CBZ then emit a cmp + Bcc.
927 if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
928 CCMI->getOperand(1), MIB))
929 return false;
930 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
931 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
932 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
933 I.eraseFromParent();
934 return true;
935 }
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000936
937 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
938 if (RB.getID() != AArch64::GPRRegBankID)
939 return false;
940
941 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
942 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
943 return false;
944
945 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
946 unsigned CBOpc = 0;
947 if (CmpWidth <= 32)
948 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
949 else if (CmpWidth == 64)
950 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
951 else
952 return false;
953
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000954 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
955 .addUse(LHS)
956 .addMBB(DestMBB)
957 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000958
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000959 I.eraseFromParent();
960 return true;
961}
962
Amara Emerson9bf092d2019-04-09 21:22:43 +0000963bool AArch64InstructionSelector::selectVectorSHL(
964 MachineInstr &I, MachineRegisterInfo &MRI) const {
965 assert(I.getOpcode() == TargetOpcode::G_SHL);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000966 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000967 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000968 Register Src1Reg = I.getOperand(1).getReg();
969 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000970
971 if (!Ty.isVector())
972 return false;
973
974 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000975 if (Ty == LLT::vector(4, 32)) {
976 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000977 } else if (Ty == LLT::vector(2, 32)) {
978 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000979 } else {
980 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
981 return false;
982 }
983
984 MachineIRBuilder MIB(I);
985 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
986 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
987 I.eraseFromParent();
988 return true;
989}
990
991bool AArch64InstructionSelector::selectVectorASHR(
992 MachineInstr &I, MachineRegisterInfo &MRI) const {
993 assert(I.getOpcode() == TargetOpcode::G_ASHR);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000994 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000995 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000996 Register Src1Reg = I.getOperand(1).getReg();
997 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000998
999 if (!Ty.isVector())
1000 return false;
1001
1002 // There is not a shift right register instruction, but the shift left
1003 // register instruction takes a signed value, where negative numbers specify a
1004 // right shift.
1005
1006 unsigned Opc = 0;
1007 unsigned NegOpc = 0;
1008 const TargetRegisterClass *RC = nullptr;
1009 if (Ty == LLT::vector(4, 32)) {
1010 Opc = AArch64::SSHLv4i32;
1011 NegOpc = AArch64::NEGv4i32;
1012 RC = &AArch64::FPR128RegClass;
1013 } else if (Ty == LLT::vector(2, 32)) {
1014 Opc = AArch64::SSHLv2i32;
1015 NegOpc = AArch64::NEGv2i32;
1016 RC = &AArch64::FPR64RegClass;
1017 } else {
1018 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1019 return false;
1020 }
1021
1022 MachineIRBuilder MIB(I);
1023 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1024 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1025 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1026 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1027 I.eraseFromParent();
1028 return true;
1029}
1030
Tim Northovere9600d82017-02-08 17:57:27 +00001031bool AArch64InstructionSelector::selectVaStartAAPCS(
1032 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1033 return false;
1034}
1035
1036bool AArch64InstructionSelector::selectVaStartDarwin(
1037 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1038 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001039 Register ListReg = I.getOperand(0).getReg();
Tim Northovere9600d82017-02-08 17:57:27 +00001040
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001041 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Tim Northovere9600d82017-02-08 17:57:27 +00001042
1043 auto MIB =
1044 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1045 .addDef(ArgsAddrReg)
1046 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1047 .addImm(0)
1048 .addImm(0);
1049
1050 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1051
1052 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1053 .addUse(ArgsAddrReg)
1054 .addUse(ListReg)
1055 .addImm(0)
1056 .addMemOperand(*I.memoperands_begin());
1057
1058 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1059 I.eraseFromParent();
1060 return true;
1061}
1062
Amara Emerson1e8c1642018-07-31 00:09:02 +00001063void AArch64InstructionSelector::materializeLargeCMVal(
Peter Collingbourne33773d52019-07-31 20:14:09 +00001064 MachineInstr &I, const Value *V, unsigned OpFlags) const {
Amara Emerson1e8c1642018-07-31 00:09:02 +00001065 MachineBasicBlock &MBB = *I.getParent();
1066 MachineFunction &MF = *MBB.getParent();
1067 MachineRegisterInfo &MRI = MF.getRegInfo();
1068 MachineIRBuilder MIB(I);
1069
Aditya Nandakumarcef44a22018-12-11 00:48:50 +00001070 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +00001071 MovZ->addOperand(MF, I.getOperand(1));
1072 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1073 AArch64II::MO_NC);
1074 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1075 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1076
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001077 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1078 Register ForceDstReg) {
1079 Register DstReg = ForceDstReg
Amara Emerson1e8c1642018-07-31 00:09:02 +00001080 ? ForceDstReg
1081 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1082 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1083 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1084 MovI->addOperand(MF, MachineOperand::CreateGA(
1085 GV, MovZ->getOperand(1).getOffset(), Flags));
1086 } else {
1087 MovI->addOperand(
1088 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1089 MovZ->getOperand(1).getOffset(), Flags));
1090 }
1091 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1092 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1093 return DstReg;
1094 };
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001095 Register DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +00001096 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1097 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1098 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1099 return;
1100}
1101
Amara Emersoncac11512019-07-03 01:49:06 +00001102void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1103 MachineBasicBlock &MBB = *I.getParent();
1104 MachineFunction &MF = *MBB.getParent();
1105 MachineRegisterInfo &MRI = MF.getRegInfo();
1106
1107 switch (I.getOpcode()) {
1108 case TargetOpcode::G_SHL:
1109 case TargetOpcode::G_ASHR:
1110 case TargetOpcode::G_LSHR: {
1111 // These shifts are legalized to have 64 bit shift amounts because we want
1112 // to take advantage of the existing imported selection patterns that assume
1113 // the immediates are s64s. However, if the shifted type is 32 bits and for
1114 // some reason we receive input GMIR that has an s64 shift amount that's not
1115 // a G_CONSTANT, insert a truncate so that we can still select the s32
1116 // register-register variant.
1117 unsigned SrcReg = I.getOperand(1).getReg();
1118 unsigned ShiftReg = I.getOperand(2).getReg();
1119 const LLT ShiftTy = MRI.getType(ShiftReg);
1120 const LLT SrcTy = MRI.getType(SrcReg);
1121 if (SrcTy.isVector())
1122 return;
1123 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1124 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1125 return;
1126 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1127 assert(AmtMI && "could not find a vreg definition for shift amount");
1128 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1129 // Insert a subregister copy to implement a 64->32 trunc
1130 MachineIRBuilder MIB(I);
1131 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1132 .addReg(ShiftReg, 0, AArch64::sub_32);
1133 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1134 I.getOperand(2).setReg(Trunc.getReg(0));
1135 }
1136 return;
1137 }
Jessica Paquette41affad2019-07-20 01:55:35 +00001138 case TargetOpcode::G_STORE:
1139 contractCrossBankCopyIntoStore(I, MRI);
1140 return;
Amara Emersoncac11512019-07-03 01:49:06 +00001141 default:
1142 return;
1143 }
1144}
1145
1146bool AArch64InstructionSelector::earlySelectSHL(
1147 MachineInstr &I, MachineRegisterInfo &MRI) const {
1148 // We try to match the immediate variant of LSL, which is actually an alias
1149 // for a special case of UBFM. Otherwise, we fall back to the imported
1150 // selector which will match the register variant.
1151 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1152 const auto &MO = I.getOperand(2);
1153 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1154 if (!VRegAndVal)
1155 return false;
1156
1157 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1158 if (DstTy.isVector())
1159 return false;
1160 bool Is64Bit = DstTy.getSizeInBits() == 64;
1161 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1162 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1163 MachineIRBuilder MIB(I);
1164
1165 if (!Imm1Fn || !Imm2Fn)
1166 return false;
1167
1168 auto NewI =
1169 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1170 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1171
1172 for (auto &RenderFn : *Imm1Fn)
1173 RenderFn(NewI);
1174 for (auto &RenderFn : *Imm2Fn)
1175 RenderFn(NewI);
1176
1177 I.eraseFromParent();
1178 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1179}
1180
Jessica Paquette41affad2019-07-20 01:55:35 +00001181void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1182 MachineInstr &I, MachineRegisterInfo &MRI) const {
1183 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1184 // If we're storing a scalar, it doesn't matter what register bank that
1185 // scalar is on. All that matters is the size.
1186 //
1187 // So, if we see something like this (with a 32-bit scalar as an example):
1188 //
1189 // %x:gpr(s32) = ... something ...
1190 // %y:fpr(s32) = COPY %x:gpr(s32)
1191 // G_STORE %y:fpr(s32)
1192 //
1193 // We can fix this up into something like this:
1194 //
1195 // G_STORE %x:gpr(s32)
1196 //
1197 // And then continue the selection process normally.
1198 MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1199 if (!Def)
1200 return;
1201 Register DefDstReg = Def->getOperand(0).getReg();
1202 LLT DefDstTy = MRI.getType(DefDstReg);
1203 Register StoreSrcReg = I.getOperand(0).getReg();
1204 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1205
1206 // If we get something strange like a physical register, then we shouldn't
1207 // go any further.
1208 if (!DefDstTy.isValid())
1209 return;
1210
1211 // Are the source and dst types the same size?
1212 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1213 return;
1214
1215 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1216 RBI.getRegBank(DefDstReg, MRI, TRI))
1217 return;
1218
1219 // We have a cross-bank copy, which is entering a store. Let's fold it.
1220 I.getOperand(0).setReg(DefDstReg);
1221}
1222
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001223bool AArch64InstructionSelector::earlySelectLoad(
1224 MachineInstr &I, MachineRegisterInfo &MRI) const {
1225 // Try to fold in shifts, etc into the addressing mode of a load.
1226 assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1227
1228 // Don't handle atomic loads/stores yet.
1229 auto &MemOp = **I.memoperands_begin();
1230 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
1231 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1232 return false;
1233 }
1234
1235 unsigned MemBytes = MemOp.getSize();
1236
1237 // Only support 64-bit loads for now.
1238 if (MemBytes != 8)
1239 return false;
1240
1241 Register DstReg = I.getOperand(0).getReg();
1242 const LLT DstTy = MRI.getType(DstReg);
1243 // Don't handle vectors.
1244 if (DstTy.isVector())
1245 return false;
1246
1247 unsigned DstSize = DstTy.getSizeInBits();
1248 // TODO: 32-bit destinations.
1249 if (DstSize != 64)
1250 return false;
1251
Jessica Paquette2b404d02019-07-23 16:09:42 +00001252 // Check if we can do any folding from GEPs/shifts etc. into the load.
1253 auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001254 if (!ImmFn)
1255 return false;
1256
1257 // We can fold something. Emit the load here.
1258 MachineIRBuilder MIB(I);
1259
1260 // Choose the instruction based off the size of the element being loaded, and
1261 // whether or not we're loading into a FPR.
1262 const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1263 unsigned Opc =
1264 RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1265 // Construct the load.
1266 auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1267 for (auto &RenderFn : *ImmFn)
1268 RenderFn(LoadMI);
1269 LoadMI.addMemOperand(*I.memoperands_begin());
1270 I.eraseFromParent();
1271 return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1272}
1273
Amara Emersoncac11512019-07-03 01:49:06 +00001274bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1275 assert(I.getParent() && "Instruction should be in a basic block!");
1276 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1277
1278 MachineBasicBlock &MBB = *I.getParent();
1279 MachineFunction &MF = *MBB.getParent();
1280 MachineRegisterInfo &MRI = MF.getRegInfo();
1281
1282 switch (I.getOpcode()) {
1283 case TargetOpcode::G_SHL:
1284 return earlySelectSHL(I, MRI);
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001285 case TargetOpcode::G_LOAD:
1286 return earlySelectLoad(I, MRI);
Tim Northoverde98e922019-08-06 09:18:41 +00001287 case TargetOpcode::G_CONSTANT: {
1288 bool IsZero = false;
1289 if (I.getOperand(1).isCImm())
1290 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1291 else if (I.getOperand(1).isImm())
1292 IsZero = I.getOperand(1).getImm() == 0;
1293
1294 if (!IsZero)
1295 return false;
1296
1297 Register DefReg = I.getOperand(0).getReg();
1298 LLT Ty = MRI.getType(DefReg);
1299 assert((Ty == LLT::scalar(64) || Ty == LLT::scalar(32)) &&
1300 "Unexpected legal constant type");
1301
1302 if (Ty == LLT::scalar(64)) {
1303 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1304 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1305 } else {
1306 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1307 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1308 }
1309 I.setDesc(TII.get(TargetOpcode::COPY));
1310 return true;
1311 }
Amara Emersoncac11512019-07-03 01:49:06 +00001312 default:
1313 return false;
1314 }
1315}
1316
Daniel Sandersf76f3152017-11-16 00:46:35 +00001317bool AArch64InstructionSelector::select(MachineInstr &I,
1318 CodeGenCoverage &CoverageInfo) const {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001319 assert(I.getParent() && "Instruction should be in a basic block!");
1320 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1321
1322 MachineBasicBlock &MBB = *I.getParent();
1323 MachineFunction &MF = *MBB.getParent();
1324 MachineRegisterInfo &MRI = MF.getRegInfo();
1325
Tim Northovercdf23f12016-10-31 18:30:59 +00001326 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001327 // G_PHI requires same handling as PHI
1328 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +00001329 // Certain non-generic instructions also need some special handling.
1330
1331 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1332 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001333
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001334 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001335 const Register DefReg = I.getOperand(0).getReg();
Tim Northover7d88da62016-11-08 00:34:06 +00001336 const LLT DefTy = MRI.getType(DefReg);
1337
Matt Arsenault732149b2019-07-01 17:02:24 +00001338 const RegClassOrRegBank &RegClassOrBank =
1339 MRI.getRegClassOrRegBank(DefReg);
Tim Northover7d88da62016-11-08 00:34:06 +00001340
Matt Arsenault732149b2019-07-01 17:02:24 +00001341 const TargetRegisterClass *DefRC
1342 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1343 if (!DefRC) {
1344 if (!DefTy.isValid()) {
1345 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1346 return false;
1347 }
1348 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1349 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001350 if (!DefRC) {
Matt Arsenault732149b2019-07-01 17:02:24 +00001351 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1352 return false;
Tim Northover7d88da62016-11-08 00:34:06 +00001353 }
1354 }
Matt Arsenault732149b2019-07-01 17:02:24 +00001355
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001356 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001357
1358 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1359 }
1360
1361 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001362 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001363
1364 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001365 }
1366
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001367
1368 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001369 LLVM_DEBUG(
1370 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001371 return false;
1372 }
1373
Amara Emersoncac11512019-07-03 01:49:06 +00001374 // Try to do some lowering before we start instruction selecting. These
1375 // lowerings are purely transformations on the input G_MIR and so selection
1376 // must continue after any modification of the instruction.
1377 preISelLower(I);
1378
1379 // There may be patterns where the importer can't deal with them optimally,
1380 // but does select it to a suboptimal sequence so our custom C++ selection
1381 // code later never has a chance to work on it. Therefore, we have an early
1382 // selection attempt here to give priority to certain selection routines
1383 // over the imported ones.
1384 if (earlySelect(I))
1385 return true;
1386
Daniel Sandersf76f3152017-11-16 00:46:35 +00001387 if (selectImpl(I, CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001388 return true;
1389
Tim Northover32a078a2016-09-15 10:09:59 +00001390 LLT Ty =
1391 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001392
Amara Emerson3739a202019-03-15 21:59:50 +00001393 MachineIRBuilder MIB(I);
1394
Tim Northover69271c62016-10-12 22:49:11 +00001395 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001396 case TargetOpcode::G_BRCOND: {
1397 if (Ty.getSizeInBits() > 32) {
1398 // We shouldn't need this on AArch64, but it would be implemented as an
1399 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1400 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001401 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1402 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001403 return false;
1404 }
1405
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001406 const Register CondReg = I.getOperand(0).getReg();
Tim Northover5e3dbf32016-10-12 22:49:01 +00001407 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1408
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001409 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1410 // instructions will not be produced, as they are conditional branch
1411 // instructions that do not set flags.
1412 bool ProduceNonFlagSettingCondBr =
1413 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1414 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001415 return true;
1416
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001417 if (ProduceNonFlagSettingCondBr) {
1418 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1419 .addUse(CondReg)
1420 .addImm(/*bit offset=*/0)
1421 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001422
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001423 I.eraseFromParent();
1424 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1425 } else {
1426 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1427 .addDef(AArch64::WZR)
1428 .addUse(CondReg)
1429 .addImm(1);
1430 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1431 auto Bcc =
1432 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1433 .addImm(AArch64CC::EQ)
1434 .addMBB(DestMBB);
1435
1436 I.eraseFromParent();
1437 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1438 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001439 }
1440
Kristof Beyls65a12c02017-01-30 09:13:18 +00001441 case TargetOpcode::G_BRINDIRECT: {
1442 I.setDesc(TII.get(AArch64::BR));
1443 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1444 }
1445
Amara Emerson6e71b342019-06-21 18:10:41 +00001446 case TargetOpcode::G_BRJT:
1447 return selectBrJT(I, MRI);
1448
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001449 case TargetOpcode::G_BSWAP: {
1450 // Handle vector types for G_BSWAP directly.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001451 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001452 LLT DstTy = MRI.getType(DstReg);
1453
1454 // We should only get vector types here; everything else is handled by the
1455 // importer right now.
1456 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1457 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1458 return false;
1459 }
1460
1461 // Only handle 4 and 2 element vectors for now.
1462 // TODO: 16-bit elements.
1463 unsigned NumElts = DstTy.getNumElements();
1464 if (NumElts != 4 && NumElts != 2) {
1465 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1466 return false;
1467 }
1468
1469 // Choose the correct opcode for the supported types. Right now, that's
1470 // v2s32, v4s32, and v2s64.
1471 unsigned Opc = 0;
1472 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1473 if (EltSize == 32)
1474 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1475 : AArch64::REV32v16i8;
1476 else if (EltSize == 64)
1477 Opc = AArch64::REV64v16i8;
1478
1479 // We should always get something by the time we get here...
1480 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1481
1482 I.setDesc(TII.get(Opc));
1483 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1484 }
1485
Tim Northover4494d692016-10-18 19:47:57 +00001486 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001487 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001488 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1489
Amara Emerson8f25a022019-06-21 16:43:50 +00001490 const LLT s8 = LLT::scalar(8);
1491 const LLT s16 = LLT::scalar(16);
Tim Northover4494d692016-10-18 19:47:57 +00001492 const LLT s32 = LLT::scalar(32);
1493 const LLT s64 = LLT::scalar(64);
1494 const LLT p0 = LLT::pointer(0, 64);
1495
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001496 const Register DefReg = I.getOperand(0).getReg();
Tim Northover4494d692016-10-18 19:47:57 +00001497 const LLT DefTy = MRI.getType(DefReg);
1498 const unsigned DefSize = DefTy.getSizeInBits();
1499 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1500
1501 // FIXME: Redundant check, but even less readable when factored out.
1502 if (isFP) {
1503 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001504 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1505 << " constant, expected: " << s32 << " or " << s64
1506 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001507 return false;
1508 }
1509
1510 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001511 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1512 << " constant on bank: " << RB
1513 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001514 return false;
1515 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001516
1517 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1518 // can be sure tablegen works correctly and isn't rescued by this code.
1519 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1520 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001521 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001522 // s32 and s64 are covered by tablegen.
Amara Emerson8f25a022019-06-21 16:43:50 +00001523 if (Ty != p0 && Ty != s8 && Ty != s16) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001524 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1525 << " constant, expected: " << s32 << ", " << s64
1526 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001527 return false;
1528 }
1529
1530 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001531 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1532 << " constant on bank: " << RB
1533 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001534 return false;
1535 }
1536 }
1537
Amara Emerson8f25a022019-06-21 16:43:50 +00001538 // We allow G_CONSTANT of types < 32b.
Tim Northover4494d692016-10-18 19:47:57 +00001539 const unsigned MovOpc =
Amara Emerson8f25a022019-06-21 16:43:50 +00001540 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
Tim Northover4494d692016-10-18 19:47:57 +00001541
Tim Northover4494d692016-10-18 19:47:57 +00001542 if (isFP) {
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001543 // Either emit a FMOV, or emit a copy to emit a normal mov.
Tim Northover4494d692016-10-18 19:47:57 +00001544 const TargetRegisterClass &GPRRC =
1545 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1546 const TargetRegisterClass &FPRRC =
1547 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1548
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001549 // Can we use a FMOV instruction to represent the immediate?
1550 if (emitFMovForFConstant(I, MRI))
1551 return true;
1552
1553 // Nope. Emit a copy and use a normal mov instead.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001554 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
Tim Northover4494d692016-10-18 19:47:57 +00001555 MachineOperand &RegOp = I.getOperand(0);
1556 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001557 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1558 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001559
1560 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001561 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001562 return false;
1563 }
1564
1565 MachineOperand &ImmOp = I.getOperand(1);
1566 // FIXME: Is going through int64_t always correct?
1567 ImmOp.ChangeToImmediate(
1568 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001569 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001570 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1571 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001572 } else if (I.getOperand(1).isImm()) {
1573 uint64_t Val = I.getOperand(1).getImm();
1574 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001575 }
1576
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001577 I.setDesc(TII.get(MovOpc));
Tim Northover4494d692016-10-18 19:47:57 +00001578 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1579 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001580 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001581 case TargetOpcode::G_EXTRACT: {
Amara Emerson511f7f52019-07-23 22:05:13 +00001582 Register DstReg = I.getOperand(0).getReg();
1583 Register SrcReg = I.getOperand(1).getReg();
1584 LLT SrcTy = MRI.getType(SrcReg);
1585 LLT DstTy = MRI.getType(DstReg);
Amara Emerson242efdb2018-02-18 17:28:34 +00001586 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001587 unsigned SrcSize = SrcTy.getSizeInBits();
Amara Emerson511f7f52019-07-23 22:05:13 +00001588
1589 if (SrcTy.getSizeInBits() > 64) {
1590 // This should be an extract of an s128, which is like a vector extract.
1591 if (SrcTy.getSizeInBits() != 128)
1592 return false;
1593 // Only support extracting 64 bits from an s128 at the moment.
1594 if (DstTy.getSizeInBits() != 64)
1595 return false;
1596
1597 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1598 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1599 // Check we have the right regbank always.
1600 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1601 DstRB.getID() == AArch64::FPRRegBankID &&
1602 "Wrong extract regbank!");
Fangrui Song305ace72019-07-24 01:59:44 +00001603 (void)SrcRB;
Amara Emerson511f7f52019-07-23 22:05:13 +00001604
1605 // Emit the same code as a vector extract.
1606 // Offset must be a multiple of 64.
1607 unsigned Offset = I.getOperand(2).getImm();
1608 if (Offset % 64 != 0)
1609 return false;
1610 unsigned LaneIdx = Offset / 64;
1611 MachineIRBuilder MIB(I);
1612 MachineInstr *Extract = emitExtractVectorElt(
1613 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1614 if (!Extract)
1615 return false;
1616 I.eraseFromParent();
1617 return true;
1618 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001619
Amara Emersonbc03bae2018-02-18 17:03:02 +00001620 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001621 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1622 Ty.getSizeInBits() - 1);
1623
Amara Emersonbc03bae2018-02-18 17:03:02 +00001624 if (SrcSize < 64) {
1625 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1626 "unexpected G_EXTRACT types");
1627 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1628 }
1629
Amara Emerson511f7f52019-07-23 22:05:13 +00001630 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001631 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001632 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1633 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001634 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1635 AArch64::GPR32RegClass, MRI);
1636 I.getOperand(0).setReg(DstReg);
1637
1638 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1639 }
1640
1641 case TargetOpcode::G_INSERT: {
1642 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001643 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1644 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001645 // Larger inserts are vectors, same-size ones should be something else by
1646 // now (split up or turned into COPYs).
1647 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1648 return false;
1649
Amara Emersonbc03bae2018-02-18 17:03:02 +00001650 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001651 unsigned LSB = I.getOperand(3).getImm();
1652 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001653 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001654 MachineInstrBuilder(MF, I).addImm(Width - 1);
1655
Amara Emersonbc03bae2018-02-18 17:03:02 +00001656 if (DstSize < 64) {
1657 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1658 "unexpected G_INSERT types");
1659 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1660 }
1661
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001662 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001663 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1664 TII.get(AArch64::SUBREG_TO_REG))
1665 .addDef(SrcReg)
1666 .addImm(0)
1667 .addUse(I.getOperand(2).getReg())
1668 .addImm(AArch64::sub_32);
1669 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1670 AArch64::GPR32RegClass, MRI);
1671 I.getOperand(2).setReg(SrcReg);
1672
1673 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1674 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001675 case TargetOpcode::G_FRAME_INDEX: {
1676 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001677 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001678 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1679 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001680 return false;
1681 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001682 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001683
1684 // MOs for a #0 shifted immediate.
1685 I.addOperand(MachineOperand::CreateImm(0));
1686 I.addOperand(MachineOperand::CreateImm(0));
1687
1688 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1689 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001690
1691 case TargetOpcode::G_GLOBAL_VALUE: {
1692 auto GV = I.getOperand(1).getGlobal();
1693 if (GV->isThreadLocal()) {
1694 // FIXME: we don't support TLS yet.
1695 return false;
1696 }
Peter Collingbourne33773d52019-07-31 20:14:09 +00001697 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001698 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001699 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001700 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001701 } else if (TM.getCodeModel() == CodeModel::Large) {
1702 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001703 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001704 I.eraseFromParent();
1705 return true;
David Green9dd1d452018-08-22 11:31:39 +00001706 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1707 I.setDesc(TII.get(AArch64::ADR));
1708 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001709 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001710 I.setDesc(TII.get(AArch64::MOVaddr));
1711 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1712 MachineInstrBuilder MIB(MF, I);
1713 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1714 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1715 }
1716 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1717 }
1718
Amara Emersond3144a42019-06-06 07:58:37 +00001719 case TargetOpcode::G_ZEXTLOAD:
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001720 case TargetOpcode::G_LOAD:
1721 case TargetOpcode::G_STORE: {
Amara Emersond3144a42019-06-06 07:58:37 +00001722 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1723 MachineIRBuilder MIB(I);
1724
Tim Northover0f140c72016-09-09 11:46:34 +00001725 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001726
Tim Northover5ae83502016-09-15 09:20:34 +00001727 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001728 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1729 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001730 return false;
1731 }
1732
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001733 auto &MemOp = **I.memoperands_begin();
1734 if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001735 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001736 return false;
1737 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001738 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001739
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001740 const Register PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001741#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001742 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001743 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001744 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1745 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001746 assert(MRI.getType(PtrReg).isPointer() &&
1747 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001748#endif
1749
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001750 const Register ValReg = I.getOperand(0).getReg();
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001751 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1752
1753 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001754 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001755 if (NewOpc == I.getOpcode())
1756 return false;
1757
1758 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001759
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001760 uint64_t Offset = 0;
1761 auto *PtrMI = MRI.getVRegDef(PtrReg);
1762
1763 // Try to fold a GEP into our unsigned immediate addressing mode.
1764 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1765 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1766 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001767 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001768 const unsigned Scale = Log2_32(Size);
1769 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
1770 unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
1771 I.getOperand(1).setReg(Ptr2Reg);
1772 PtrMI = MRI.getVRegDef(Ptr2Reg);
1773 Offset = Imm / Size;
1774 }
1775 }
1776 }
1777
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001778 // If we haven't folded anything into our addressing mode yet, try to fold
1779 // a frame index into the base+offset.
1780 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1781 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1782
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001783 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001784
1785 // If we're storing a 0, use WZR/XZR.
1786 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1787 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1788 if (I.getOpcode() == AArch64::STRWui)
1789 I.getOperand(0).setReg(AArch64::WZR);
1790 else if (I.getOpcode() == AArch64::STRXui)
1791 I.getOperand(0).setReg(AArch64::XZR);
1792 }
1793 }
1794
Amara Emersond3144a42019-06-06 07:58:37 +00001795 if (IsZExtLoad) {
1796 // The zextload from a smaller type to i32 should be handled by the importer.
1797 if (MRI.getType(ValReg).getSizeInBits() != 64)
1798 return false;
1799 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1800 //and zero_extend with SUBREG_TO_REG.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001801 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1802 Register DstReg = I.getOperand(0).getReg();
Amara Emersond3144a42019-06-06 07:58:37 +00001803 I.getOperand(0).setReg(LdReg);
1804
1805 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1806 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1807 .addImm(0)
1808 .addUse(LdReg)
1809 .addImm(AArch64::sub_32);
1810 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1811 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1812 MRI);
1813 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001814 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1815 }
1816
Tim Northover9dd78f82017-02-08 21:22:25 +00001817 case TargetOpcode::G_SMULH:
1818 case TargetOpcode::G_UMULH: {
1819 // Reject the various things we don't support yet.
1820 if (unsupportedBinOp(I, RBI, MRI, TRI))
1821 return false;
1822
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001823 const Register DefReg = I.getOperand(0).getReg();
Tim Northover9dd78f82017-02-08 21:22:25 +00001824 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1825
1826 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001827 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001828 return false;
1829 }
1830
1831 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001832 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1833 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001834 return false;
1835 }
1836
1837 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1838 : AArch64::UMULHrr;
1839 I.setDesc(TII.get(NewOpc));
1840
1841 // Now that we selected an opcode, we need to constrain the register
1842 // operands to use appropriate classes.
1843 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1844 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001845 case TargetOpcode::G_FADD:
1846 case TargetOpcode::G_FSUB:
1847 case TargetOpcode::G_FMUL:
1848 case TargetOpcode::G_FDIV:
1849
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001850 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001851 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1852 return selectVectorASHR(I, MRI);
1853 LLVM_FALLTHROUGH;
1854 case TargetOpcode::G_SHL:
1855 if (Opcode == TargetOpcode::G_SHL &&
1856 MRI.getType(I.getOperand(0).getReg()).isVector())
1857 return selectVectorSHL(I, MRI);
1858 LLVM_FALLTHROUGH;
1859 case TargetOpcode::G_OR:
Jessica Paquette728b18f2019-07-24 23:11:01 +00001860 case TargetOpcode::G_LSHR: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001861 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001862 if (unsupportedBinOp(I, RBI, MRI, TRI))
1863 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001864
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001865 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001866
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001867 const Register DefReg = I.getOperand(0).getReg();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001868 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1869
1870 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1871 if (NewOpc == I.getOpcode())
1872 return false;
1873
1874 I.setDesc(TII.get(NewOpc));
1875 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001876
1877 // Now that we selected an opcode, we need to constrain the register
1878 // operands to use appropriate classes.
1879 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1880 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001881
Jessica Paquette728b18f2019-07-24 23:11:01 +00001882 case TargetOpcode::G_GEP: {
1883 MachineIRBuilder MIRBuilder(I);
1884 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1885 MIRBuilder);
1886 I.eraseFromParent();
1887 return true;
1888 }
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001889 case TargetOpcode::G_UADDO: {
1890 // TODO: Support other types.
1891 unsigned OpSize = Ty.getSizeInBits();
1892 if (OpSize != 32 && OpSize != 64) {
1893 LLVM_DEBUG(
1894 dbgs()
1895 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1896 return false;
1897 }
1898
1899 // TODO: Support vectors.
1900 if (Ty.isVector()) {
1901 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1902 return false;
1903 }
1904
1905 // Add and set the set condition flag.
1906 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1907 MachineIRBuilder MIRBuilder(I);
1908 auto AddsMI = MIRBuilder.buildInstr(
1909 AddsOpc, {I.getOperand(0).getReg()},
1910 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1911 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1912
1913 // Now, put the overflow result in the register given by the first operand
1914 // to the G_UADDO. CSINC increments the result when the predicate is false,
1915 // so to get the increment when it's true, we need to use the inverse. In
1916 // this case, we want to increment when carry is set.
1917 auto CsetMI = MIRBuilder
1918 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001919 {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001920 .addImm(getInvertedCondCode(AArch64CC::HS));
1921 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1922 I.eraseFromParent();
1923 return true;
1924 }
1925
Tim Northover398c5f52017-02-14 20:56:29 +00001926 case TargetOpcode::G_PTR_MASK: {
1927 uint64_t Align = I.getOperand(2).getImm();
1928 if (Align >= 64 || Align == 0)
1929 return false;
1930
1931 uint64_t Mask = ~((1ULL << Align) - 1);
1932 I.setDesc(TII.get(AArch64::ANDXri));
1933 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1934
1935 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1936 }
Tim Northover037af52c2016-10-31 18:31:09 +00001937 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001938 case TargetOpcode::G_TRUNC: {
1939 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1940 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1941
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001942 const Register DstReg = I.getOperand(0).getReg();
1943 const Register SrcReg = I.getOperand(1).getReg();
Tim Northoverfb8d9892016-10-12 22:49:15 +00001944
1945 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1946 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1947
1948 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001949 LLVM_DEBUG(
1950 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001951 return false;
1952 }
1953
1954 if (DstRB.getID() == AArch64::GPRRegBankID) {
1955 const TargetRegisterClass *DstRC =
1956 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1957 if (!DstRC)
1958 return false;
1959
1960 const TargetRegisterClass *SrcRC =
1961 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1962 if (!SrcRC)
1963 return false;
1964
1965 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1966 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001967 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001968 return false;
1969 }
1970
1971 if (DstRC == SrcRC) {
1972 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00001973 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
1974 SrcTy == LLT::scalar(64)) {
1975 llvm_unreachable("TableGen can import this case");
1976 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00001977 } else if (DstRC == &AArch64::GPR32RegClass &&
1978 SrcRC == &AArch64::GPR64RegClass) {
1979 I.getOperand(1).setSubReg(AArch64::sub_32);
1980 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001981 LLVM_DEBUG(
1982 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001983 return false;
1984 }
1985
1986 I.setDesc(TII.get(TargetOpcode::COPY));
1987 return true;
1988 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
1989 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
1990 I.setDesc(TII.get(AArch64::XTNv4i16));
1991 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1992 return true;
1993 }
Amara Emerson511f7f52019-07-23 22:05:13 +00001994
1995 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
1996 MachineIRBuilder MIB(I);
1997 MachineInstr *Extract = emitExtractVectorElt(
1998 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
1999 if (!Extract)
2000 return false;
2001 I.eraseFromParent();
2002 return true;
2003 }
Tim Northoverfb8d9892016-10-12 22:49:15 +00002004 }
2005
2006 return false;
2007 }
2008
Tim Northover3d38b3a2016-10-11 20:50:21 +00002009 case TargetOpcode::G_ANYEXT: {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002010 const Register DstReg = I.getOperand(0).getReg();
2011 const Register SrcReg = I.getOperand(1).getReg();
Tim Northover3d38b3a2016-10-11 20:50:21 +00002012
Quentin Colombetcb629a82016-10-12 03:57:49 +00002013 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2014 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002015 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2016 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00002017 return false;
2018 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00002019
Quentin Colombetcb629a82016-10-12 03:57:49 +00002020 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2021 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002022 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2023 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002024 return false;
2025 }
2026
2027 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2028
2029 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002030 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002031 return false;
2032 }
2033
Quentin Colombetcb629a82016-10-12 03:57:49 +00002034 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002035 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2036 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002037 return false;
2038 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00002039 // At this point G_ANYEXT is just like a plain COPY, but we need
2040 // to explicitly form the 64-bit value if any.
2041 if (DstSize > 32) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002042 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
Quentin Colombetcb629a82016-10-12 03:57:49 +00002043 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2044 .addDef(ExtSrc)
2045 .addImm(0)
2046 .addUse(SrcReg)
2047 .addImm(AArch64::sub_32);
2048 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002049 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00002050 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002051 }
2052
2053 case TargetOpcode::G_ZEXT:
2054 case TargetOpcode::G_SEXT: {
2055 unsigned Opcode = I.getOpcode();
Amara Emersonc07fe302019-07-26 00:01:09 +00002056 const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002057 const Register DefReg = I.getOperand(0).getReg();
2058 const Register SrcReg = I.getOperand(1).getReg();
Amara Emersonc07fe302019-07-26 00:01:09 +00002059 const LLT DstTy = MRI.getType(DefReg);
2060 const LLT SrcTy = MRI.getType(SrcReg);
2061 unsigned DstSize = DstTy.getSizeInBits();
2062 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover3d38b3a2016-10-11 20:50:21 +00002063
Amara Emersonc07fe302019-07-26 00:01:09 +00002064 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2065 AArch64::GPRRegBankID &&
2066 "Unexpected ext regbank");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002067
Amara Emersonc07fe302019-07-26 00:01:09 +00002068 MachineIRBuilder MIB(I);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002069 MachineInstr *ExtI;
Amara Emersonc07fe302019-07-26 00:01:09 +00002070 if (DstTy.isVector())
2071 return false; // Should be handled by imported patterns.
2072
Amara Emerson73752ab2019-08-02 21:15:36 +00002073 // First check if we're extending the result of a load which has a dest type
2074 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2075 // GPR register on AArch64 and all loads which are smaller automatically
2076 // zero-extend the upper bits. E.g.
2077 // %v(s8) = G_LOAD %p, :: (load 1)
2078 // %v2(s32) = G_ZEXT %v(s8)
2079 if (!IsSigned) {
2080 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2081 if (LoadMI &&
2082 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2083 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2084 unsigned BytesLoaded = MemOp->getSize();
2085 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2086 return selectCopy(I, TII, MRI, TRI, RBI);
2087 }
2088 }
2089
Amara Emersonc07fe302019-07-26 00:01:09 +00002090 if (DstSize == 64) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00002091 // FIXME: Can we avoid manually doing this?
2092 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002093 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2094 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002095 return false;
2096 }
2097
Amara Emersonc07fe302019-07-26 00:01:09 +00002098 auto SubregToReg =
2099 MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2100 .addImm(0)
2101 .addUse(SrcReg)
2102 .addImm(AArch64::sub_32);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002103
Amara Emersonc07fe302019-07-26 00:01:09 +00002104 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2105 {DefReg}, {SubregToReg})
2106 .addImm(0)
2107 .addImm(SrcSize - 1);
2108 } else if (DstSize <= 32) {
2109 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2110 {DefReg}, {SrcReg})
2111 .addImm(0)
2112 .addImm(SrcSize - 1);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002113 } else {
2114 return false;
2115 }
2116
2117 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002118 I.eraseFromParent();
2119 return true;
2120 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00002121
Tim Northover69271c62016-10-12 22:49:11 +00002122 case TargetOpcode::G_SITOFP:
2123 case TargetOpcode::G_UITOFP:
2124 case TargetOpcode::G_FPTOSI:
2125 case TargetOpcode::G_FPTOUI: {
2126 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2127 SrcTy = MRI.getType(I.getOperand(1).getReg());
2128 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2129 if (NewOpc == Opcode)
2130 return false;
2131
2132 I.setDesc(TII.get(NewOpc));
2133 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2134
2135 return true;
2136 }
2137
2138
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00002139 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00002140 // The importer is currently unable to import pointer types since they
2141 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00002142 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00002143
Daniel Sandersedd07842017-08-17 09:26:14 +00002144 case TargetOpcode::G_BITCAST:
2145 // Imported SelectionDAG rules can handle every bitcast except those that
2146 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00002147 // but we might not run an optimizer that deletes them. The other exception
2148 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2149 // of them.
2150 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00002151
Tim Northover9ac0eba2016-11-08 00:45:29 +00002152 case TargetOpcode::G_SELECT: {
2153 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002154 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2155 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00002156 return false;
2157 }
2158
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002159 const Register CondReg = I.getOperand(1).getReg();
2160 const Register TReg = I.getOperand(2).getReg();
2161 const Register FReg = I.getOperand(3).getReg();
Tim Northover9ac0eba2016-11-08 00:45:29 +00002162
Jessica Paquette99316042019-07-02 19:44:16 +00002163 if (tryOptSelect(I))
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002164 return true;
Tim Northover9ac0eba2016-11-08 00:45:29 +00002165
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002166 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
Tim Northover9ac0eba2016-11-08 00:45:29 +00002167 MachineInstr &TstMI =
2168 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2169 .addDef(AArch64::WZR)
2170 .addUse(CondReg)
2171 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2172
2173 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2174 .addDef(I.getOperand(0).getReg())
2175 .addUse(TReg)
2176 .addUse(FReg)
2177 .addImm(AArch64CC::NE);
2178
2179 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2180 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2181
2182 I.eraseFromParent();
2183 return true;
2184 }
Tim Northover6c02ad52016-10-12 22:49:04 +00002185 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00002186 if (Ty.isVector())
2187 return selectVectorICmp(I, MRI);
2188
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00002189 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002190 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2191 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00002192 return false;
2193 }
2194
Jessica Paquette49537bb2019-06-17 18:40:06 +00002195 MachineIRBuilder MIRBuilder(I);
Jessica Paquette99316042019-07-02 19:44:16 +00002196 if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2197 MIRBuilder))
2198 return false;
Jessica Paquette49537bb2019-06-17 18:40:06 +00002199 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
Jessica Paquette99316042019-07-02 19:44:16 +00002200 MIRBuilder);
Tim Northover6c02ad52016-10-12 22:49:04 +00002201 I.eraseFromParent();
2202 return true;
2203 }
2204
Tim Northover7dd378d2016-10-12 22:49:07 +00002205 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00002206 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002207 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2208 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00002209 return false;
2210 }
2211
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002212 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2213 if (!CmpOpc)
Tim Northover7dd378d2016-10-12 22:49:07 +00002214 return false;
Tim Northover7dd378d2016-10-12 22:49:07 +00002215
2216 // FIXME: regbank
2217
2218 AArch64CC::CondCode CC1, CC2;
2219 changeFCMPPredToAArch64CC(
2220 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2221
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002222 // Partially build the compare. Decide if we need to add a use for the
2223 // third operand based off whether or not we're comparing against 0.0.
2224 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2225 .addUse(I.getOperand(2).getReg());
2226
2227 // If we don't have an immediate compare, then we need to add a use of the
2228 // register which wasn't used for the immediate.
2229 // Note that the immediate will always be the last operand.
2230 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2231 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
Tim Northover7dd378d2016-10-12 22:49:07 +00002232
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002233 const Register DefReg = I.getOperand(0).getReg();
2234 Register Def1Reg = DefReg;
Tim Northover7dd378d2016-10-12 22:49:07 +00002235 if (CC2 != AArch64CC::AL)
2236 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2237
2238 MachineInstr &CSetMI =
2239 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2240 .addDef(Def1Reg)
2241 .addUse(AArch64::WZR)
2242 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00002243 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00002244
2245 if (CC2 != AArch64CC::AL) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002246 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Tim Northover7dd378d2016-10-12 22:49:07 +00002247 MachineInstr &CSet2MI =
2248 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2249 .addDef(Def2Reg)
2250 .addUse(AArch64::WZR)
2251 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00002252 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00002253 MachineInstr &OrMI =
2254 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2255 .addDef(DefReg)
2256 .addUse(Def1Reg)
2257 .addUse(Def2Reg);
2258 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2259 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2260 }
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002261 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover7dd378d2016-10-12 22:49:07 +00002262 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2263
2264 I.eraseFromParent();
2265 return true;
2266 }
Tim Northovere9600d82017-02-08 17:57:27 +00002267 case TargetOpcode::G_VASTART:
2268 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2269 : selectVaStartAAPCS(I, MF, MRI);
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00002270 case TargetOpcode::G_INTRINSIC:
2271 return selectIntrinsic(I, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00002272 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00002273 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002274 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00002275 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00002276 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002277 const Register DstReg = I.getOperand(0).getReg();
Amara Emerson58aea522018-02-02 01:44:43 +00002278 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2279 const TargetRegisterClass *DstRC =
2280 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2281 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00002282 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002283 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00002284 case TargetOpcode::G_BLOCK_ADDR: {
2285 if (TM.getCodeModel() == CodeModel::Large) {
2286 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2287 I.eraseFromParent();
2288 return true;
2289 } else {
2290 I.setDesc(TII.get(AArch64::MOVaddrBA));
2291 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2292 I.getOperand(0).getReg())
2293 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2294 /* Offset */ 0, AArch64II::MO_PAGE)
2295 .addBlockAddress(
2296 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2297 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2298 I.eraseFromParent();
2299 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2300 }
2301 }
Jessica Paquette991cb392019-04-23 20:46:19 +00002302 case TargetOpcode::G_INTRINSIC_TRUNC:
2303 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00002304 case TargetOpcode::G_INTRINSIC_ROUND:
2305 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00002306 case TargetOpcode::G_BUILD_VECTOR:
2307 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002308 case TargetOpcode::G_MERGE_VALUES:
2309 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002310 case TargetOpcode::G_UNMERGE_VALUES:
2311 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002312 case TargetOpcode::G_SHUFFLE_VECTOR:
2313 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00002314 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2315 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002316 case TargetOpcode::G_INSERT_VECTOR_ELT:
2317 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00002318 case TargetOpcode::G_CONCAT_VECTORS:
2319 return selectConcatVectors(I, MRI);
Amara Emerson6e71b342019-06-21 18:10:41 +00002320 case TargetOpcode::G_JUMP_TABLE:
2321 return selectJumpTable(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002322 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002323
2324 return false;
2325}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002326
Amara Emerson6e71b342019-06-21 18:10:41 +00002327bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2328 MachineRegisterInfo &MRI) const {
2329 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002330 Register JTAddr = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002331 unsigned JTI = I.getOperand(1).getIndex();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002332 Register Index = I.getOperand(2).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002333 MachineIRBuilder MIB(I);
2334
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002335 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2336 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
Amara Emerson6e71b342019-06-21 18:10:41 +00002337 MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2338 {JTAddr, Index})
2339 .addJumpTableIndex(JTI);
2340
2341 // Build the indirect branch.
2342 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2343 I.eraseFromParent();
2344 return true;
2345}
2346
2347bool AArch64InstructionSelector::selectJumpTable(
2348 MachineInstr &I, MachineRegisterInfo &MRI) const {
2349 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2350 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2351
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002352 Register DstReg = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002353 unsigned JTI = I.getOperand(1).getIndex();
2354 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2355 MachineIRBuilder MIB(I);
2356 auto MovMI =
2357 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2358 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2359 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2360 I.eraseFromParent();
2361 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2362}
2363
Jessica Paquette991cb392019-04-23 20:46:19 +00002364bool AArch64InstructionSelector::selectIntrinsicTrunc(
2365 MachineInstr &I, MachineRegisterInfo &MRI) const {
2366 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2367
2368 // Select the correct opcode.
2369 unsigned Opc = 0;
2370 if (!SrcTy.isVector()) {
2371 switch (SrcTy.getSizeInBits()) {
2372 default:
2373 case 16:
2374 Opc = AArch64::FRINTZHr;
2375 break;
2376 case 32:
2377 Opc = AArch64::FRINTZSr;
2378 break;
2379 case 64:
2380 Opc = AArch64::FRINTZDr;
2381 break;
2382 }
2383 } else {
2384 unsigned NumElts = SrcTy.getNumElements();
2385 switch (SrcTy.getElementType().getSizeInBits()) {
2386 default:
2387 break;
2388 case 16:
2389 if (NumElts == 4)
2390 Opc = AArch64::FRINTZv4f16;
2391 else if (NumElts == 8)
2392 Opc = AArch64::FRINTZv8f16;
2393 break;
2394 case 32:
2395 if (NumElts == 2)
2396 Opc = AArch64::FRINTZv2f32;
2397 else if (NumElts == 4)
2398 Opc = AArch64::FRINTZv4f32;
2399 break;
2400 case 64:
2401 if (NumElts == 2)
2402 Opc = AArch64::FRINTZv2f64;
2403 break;
2404 }
2405 }
2406
2407 if (!Opc) {
2408 // Didn't get an opcode above, bail.
2409 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2410 return false;
2411 }
2412
2413 // Legalization would have set us up perfectly for this; we just need to
2414 // set the opcode and move on.
2415 I.setDesc(TII.get(Opc));
2416 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2417}
2418
Jessica Paquette4fe75742019-04-23 23:03:03 +00002419bool AArch64InstructionSelector::selectIntrinsicRound(
2420 MachineInstr &I, MachineRegisterInfo &MRI) const {
2421 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2422
2423 // Select the correct opcode.
2424 unsigned Opc = 0;
2425 if (!SrcTy.isVector()) {
2426 switch (SrcTy.getSizeInBits()) {
2427 default:
2428 case 16:
2429 Opc = AArch64::FRINTAHr;
2430 break;
2431 case 32:
2432 Opc = AArch64::FRINTASr;
2433 break;
2434 case 64:
2435 Opc = AArch64::FRINTADr;
2436 break;
2437 }
2438 } else {
2439 unsigned NumElts = SrcTy.getNumElements();
2440 switch (SrcTy.getElementType().getSizeInBits()) {
2441 default:
2442 break;
2443 case 16:
2444 if (NumElts == 4)
2445 Opc = AArch64::FRINTAv4f16;
2446 else if (NumElts == 8)
2447 Opc = AArch64::FRINTAv8f16;
2448 break;
2449 case 32:
2450 if (NumElts == 2)
2451 Opc = AArch64::FRINTAv2f32;
2452 else if (NumElts == 4)
2453 Opc = AArch64::FRINTAv4f32;
2454 break;
2455 case 64:
2456 if (NumElts == 2)
2457 Opc = AArch64::FRINTAv2f64;
2458 break;
2459 }
2460 }
2461
2462 if (!Opc) {
2463 // Didn't get an opcode above, bail.
2464 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2465 return false;
2466 }
2467
2468 // Legalization would have set us up perfectly for this; we just need to
2469 // set the opcode and move on.
2470 I.setDesc(TII.get(Opc));
2471 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2472}
2473
Amara Emerson9bf092d2019-04-09 21:22:43 +00002474bool AArch64InstructionSelector::selectVectorICmp(
2475 MachineInstr &I, MachineRegisterInfo &MRI) const {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002476 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002477 LLT DstTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002478 Register SrcReg = I.getOperand(2).getReg();
2479 Register Src2Reg = I.getOperand(3).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002480 LLT SrcTy = MRI.getType(SrcReg);
2481
2482 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2483 unsigned NumElts = DstTy.getNumElements();
2484
2485 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2486 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2487 // Third index is cc opcode:
2488 // 0 == eq
2489 // 1 == ugt
2490 // 2 == uge
2491 // 3 == ult
2492 // 4 == ule
2493 // 5 == sgt
2494 // 6 == sge
2495 // 7 == slt
2496 // 8 == sle
2497 // ne is done by negating 'eq' result.
2498
2499 // This table below assumes that for some comparisons the operands will be
2500 // commuted.
2501 // ult op == commute + ugt op
2502 // ule op == commute + uge op
2503 // slt op == commute + sgt op
2504 // sle op == commute + sge op
2505 unsigned PredIdx = 0;
2506 bool SwapOperands = false;
2507 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2508 switch (Pred) {
2509 case CmpInst::ICMP_NE:
2510 case CmpInst::ICMP_EQ:
2511 PredIdx = 0;
2512 break;
2513 case CmpInst::ICMP_UGT:
2514 PredIdx = 1;
2515 break;
2516 case CmpInst::ICMP_UGE:
2517 PredIdx = 2;
2518 break;
2519 case CmpInst::ICMP_ULT:
2520 PredIdx = 3;
2521 SwapOperands = true;
2522 break;
2523 case CmpInst::ICMP_ULE:
2524 PredIdx = 4;
2525 SwapOperands = true;
2526 break;
2527 case CmpInst::ICMP_SGT:
2528 PredIdx = 5;
2529 break;
2530 case CmpInst::ICMP_SGE:
2531 PredIdx = 6;
2532 break;
2533 case CmpInst::ICMP_SLT:
2534 PredIdx = 7;
2535 SwapOperands = true;
2536 break;
2537 case CmpInst::ICMP_SLE:
2538 PredIdx = 8;
2539 SwapOperands = true;
2540 break;
2541 default:
2542 llvm_unreachable("Unhandled icmp predicate");
2543 return false;
2544 }
2545
2546 // This table obviously should be tablegen'd when we have our GISel native
2547 // tablegen selector.
2548
2549 static const unsigned OpcTable[4][4][9] = {
2550 {
2551 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2552 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2553 0 /* invalid */},
2554 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2555 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2556 0 /* invalid */},
2557 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2558 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2559 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2560 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2561 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2562 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2563 },
2564 {
2565 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2566 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2567 0 /* invalid */},
2568 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2569 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2570 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2571 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2572 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2573 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2574 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2575 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2576 0 /* invalid */}
2577 },
2578 {
2579 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2580 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2581 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2582 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2583 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2584 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2585 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2586 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2587 0 /* invalid */},
2588 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2589 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2590 0 /* invalid */}
2591 },
2592 {
2593 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2594 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2595 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2596 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2597 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2598 0 /* invalid */},
2599 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2600 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2601 0 /* invalid */},
2602 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2603 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2604 0 /* invalid */}
2605 },
2606 };
2607 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2608 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2609 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2610 if (!Opc) {
2611 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2612 return false;
2613 }
2614
2615 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2616 const TargetRegisterClass *SrcRC =
2617 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2618 if (!SrcRC) {
2619 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2620 return false;
2621 }
2622
2623 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2624 if (SrcTy.getSizeInBits() == 128)
2625 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2626
2627 if (SwapOperands)
2628 std::swap(SrcReg, Src2Reg);
2629
2630 MachineIRBuilder MIB(I);
2631 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2632 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2633
2634 // Invert if we had a 'ne' cc.
2635 if (NotOpc) {
2636 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2637 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2638 } else {
2639 MIB.buildCopy(DstReg, Cmp.getReg(0));
2640 }
2641 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2642 I.eraseFromParent();
2643 return true;
2644}
2645
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002646MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002647 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002648 MachineIRBuilder &MIRBuilder) const {
2649 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002650
2651 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002652 auto Ins =
2653 MIRBuilder
2654 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2655 .addImm(SubregIndex);
2656 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2657 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2658 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002659 };
2660
Amara Emerson8acb0d92019-03-04 19:16:00 +00002661 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002662 case 16:
2663 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002664 case 32:
2665 return BuildFn(AArch64::ssub);
2666 case 64:
2667 return BuildFn(AArch64::dsub);
2668 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002669 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002670 }
2671}
2672
Amara Emerson8cb186c2018-12-20 01:11:04 +00002673bool AArch64InstructionSelector::selectMergeValues(
2674 MachineInstr &I, MachineRegisterInfo &MRI) const {
2675 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2676 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2677 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2678 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
Amara Emerson511f7f52019-07-23 22:05:13 +00002679 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002680
Amara Emerson8cb186c2018-12-20 01:11:04 +00002681 if (I.getNumOperands() != 3)
2682 return false;
Amara Emerson511f7f52019-07-23 22:05:13 +00002683
2684 // Merging 2 s64s into an s128.
2685 if (DstTy == LLT::scalar(128)) {
2686 if (SrcTy.getSizeInBits() != 64)
2687 return false;
2688 MachineIRBuilder MIB(I);
2689 Register DstReg = I.getOperand(0).getReg();
2690 Register Src1Reg = I.getOperand(1).getReg();
2691 Register Src2Reg = I.getOperand(2).getReg();
2692 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2693 MachineInstr *InsMI =
2694 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2695 if (!InsMI)
2696 return false;
2697 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2698 Src2Reg, /* LaneIdx */ 1, RB, MIB);
2699 if (!Ins2MI)
2700 return false;
2701 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2702 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2703 I.eraseFromParent();
2704 return true;
2705 }
2706
Amara Emerson8cb186c2018-12-20 01:11:04 +00002707 if (RB.getID() != AArch64::GPRRegBankID)
2708 return false;
2709
Amara Emerson511f7f52019-07-23 22:05:13 +00002710 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2711 return false;
2712
Amara Emerson8cb186c2018-12-20 01:11:04 +00002713 auto *DstRC = &AArch64::GPR64RegClass;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002714 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002715 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2716 TII.get(TargetOpcode::SUBREG_TO_REG))
2717 .addDef(SubToRegDef)
2718 .addImm(0)
2719 .addUse(I.getOperand(1).getReg())
2720 .addImm(AArch64::sub_32);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002721 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002722 // Need to anyext the second scalar before we can use bfm
2723 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2724 TII.get(TargetOpcode::SUBREG_TO_REG))
2725 .addDef(SubToRegDef2)
2726 .addImm(0)
2727 .addUse(I.getOperand(2).getReg())
2728 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002729 MachineInstr &BFM =
2730 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002731 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002732 .addUse(SubToRegDef)
2733 .addUse(SubToRegDef2)
2734 .addImm(32)
2735 .addImm(31);
2736 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2737 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2738 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2739 I.eraseFromParent();
2740 return true;
2741}
2742
Jessica Paquette607774c2019-03-11 22:18:01 +00002743static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2744 const unsigned EltSize) {
2745 // Choose a lane copy opcode and subregister based off of the size of the
2746 // vector's elements.
2747 switch (EltSize) {
2748 case 16:
2749 CopyOpc = AArch64::CPYi16;
2750 ExtractSubReg = AArch64::hsub;
2751 break;
2752 case 32:
2753 CopyOpc = AArch64::CPYi32;
2754 ExtractSubReg = AArch64::ssub;
2755 break;
2756 case 64:
2757 CopyOpc = AArch64::CPYi64;
2758 ExtractSubReg = AArch64::dsub;
2759 break;
2760 default:
2761 // Unknown size, bail out.
2762 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2763 return false;
2764 }
2765 return true;
2766}
2767
Amara Emersond61b89b2019-03-14 22:48:18 +00002768MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002769 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2770 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
Amara Emersond61b89b2019-03-14 22:48:18 +00002771 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2772 unsigned CopyOpc = 0;
2773 unsigned ExtractSubReg = 0;
2774 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2775 LLVM_DEBUG(
2776 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2777 return nullptr;
2778 }
2779
2780 const TargetRegisterClass *DstRC =
2781 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2782 if (!DstRC) {
2783 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2784 return nullptr;
2785 }
2786
2787 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2788 const LLT &VecTy = MRI.getType(VecReg);
2789 const TargetRegisterClass *VecRC =
2790 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2791 if (!VecRC) {
2792 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2793 return nullptr;
2794 }
2795
2796 // The register that we're going to copy into.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002797 Register InsertReg = VecReg;
Amara Emersond61b89b2019-03-14 22:48:18 +00002798 if (!DstReg)
2799 DstReg = MRI.createVirtualRegister(DstRC);
2800 // If the lane index is 0, we just use a subregister COPY.
2801 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002802 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2803 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002804 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002805 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002806 }
2807
2808 // Lane copies require 128-bit wide registers. If we're dealing with an
2809 // unpacked vector, then we need to move up to that width. Insert an implicit
2810 // def and a subregister insert to get us there.
2811 if (VecTy.getSizeInBits() != 128) {
2812 MachineInstr *ScalarToVector = emitScalarToVector(
2813 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2814 if (!ScalarToVector)
2815 return nullptr;
2816 InsertReg = ScalarToVector->getOperand(0).getReg();
2817 }
2818
2819 MachineInstr *LaneCopyMI =
2820 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2821 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2822
2823 // Make sure that we actually constrain the initial copy.
2824 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2825 return LaneCopyMI;
2826}
2827
Jessica Paquette607774c2019-03-11 22:18:01 +00002828bool AArch64InstructionSelector::selectExtractElt(
2829 MachineInstr &I, MachineRegisterInfo &MRI) const {
2830 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2831 "unexpected opcode!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002832 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002833 const LLT NarrowTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002834 const Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002835 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002836 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002837 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2838 "source register size too small!");
2839 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2840
2841 // Need the lane index to determine the correct copy opcode.
2842 MachineOperand &LaneIdxOp = I.getOperand(2);
2843 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2844
2845 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2846 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2847 return false;
2848 }
2849
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002850 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002851 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2852 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002853 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002854 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002855
Jessica Paquette607774c2019-03-11 22:18:01 +00002856 MachineIRBuilder MIRBuilder(I);
2857
Amara Emersond61b89b2019-03-14 22:48:18 +00002858 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2859 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2860 LaneIdx, MIRBuilder);
2861 if (!Extract)
2862 return false;
2863
2864 I.eraseFromParent();
2865 return true;
2866}
2867
2868bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2869 MachineInstr &I, MachineRegisterInfo &MRI) const {
2870 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002871 Register SrcReg = I.getOperand(NumElts).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002872 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2873 const LLT SrcTy = MRI.getType(SrcReg);
2874
2875 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2876 if (SrcTy.getSizeInBits() > 128) {
2877 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2878 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002879 }
2880
Amara Emersond61b89b2019-03-14 22:48:18 +00002881 MachineIRBuilder MIB(I);
2882
2883 // We implement a split vector operation by treating the sub-vectors as
2884 // scalars and extracting them.
2885 const RegisterBank &DstRB =
2886 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2887 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002888 Register Dst = I.getOperand(OpIdx).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002889 MachineInstr *Extract =
2890 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2891 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002892 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002893 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002894 I.eraseFromParent();
2895 return true;
2896}
2897
Jessica Paquette245047d2019-01-24 22:00:41 +00002898bool AArch64InstructionSelector::selectUnmergeValues(
2899 MachineInstr &I, MachineRegisterInfo &MRI) const {
2900 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2901 "unexpected opcode");
2902
2903 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2904 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2905 AArch64::FPRRegBankID ||
2906 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2907 AArch64::FPRRegBankID) {
2908 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2909 "currently unsupported.\n");
2910 return false;
2911 }
2912
2913 // The last operand is the vector source register, and every other operand is
2914 // a register to unpack into.
2915 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002916 Register SrcReg = I.getOperand(NumElts).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002917 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2918 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002919 (void)WideTy;
Jessica Paquette245047d2019-01-24 22:00:41 +00002920 assert(WideTy.isVector() && "can only unmerge from vector types!");
2921 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2922 "source register size too small!");
2923
Amara Emersond61b89b2019-03-14 22:48:18 +00002924 if (!NarrowTy.isScalar())
2925 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002926
Amara Emerson3739a202019-03-15 21:59:50 +00002927 MachineIRBuilder MIB(I);
2928
Jessica Paquette245047d2019-01-24 22:00:41 +00002929 // Choose a lane copy opcode and subregister based off of the size of the
2930 // vector's elements.
2931 unsigned CopyOpc = 0;
2932 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002933 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002934 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002935
2936 // Set up for the lane copies.
2937 MachineBasicBlock &MBB = *I.getParent();
2938
2939 // Stores the registers we'll be copying from.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002940 SmallVector<Register, 4> InsertRegs;
Jessica Paquette245047d2019-01-24 22:00:41 +00002941
2942 // We'll use the first register twice, so we only need NumElts-1 registers.
2943 unsigned NumInsertRegs = NumElts - 1;
2944
2945 // If our elements fit into exactly 128 bits, then we can copy from the source
2946 // directly. Otherwise, we need to do a bit of setup with some subregister
2947 // inserts.
2948 if (NarrowTy.getSizeInBits() * NumElts == 128) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002949 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00002950 } else {
2951 // No. We have to perform subregister inserts. For each insert, create an
2952 // implicit def and a subregister insert, and save the register we create.
2953 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002954 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00002955 MachineInstr &ImpDefMI =
2956 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
2957 ImpDefReg);
2958
2959 // Now, create the subregister insert from SrcReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002960 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00002961 MachineInstr &InsMI =
2962 *BuildMI(MBB, I, I.getDebugLoc(),
2963 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
2964 .addUse(ImpDefReg)
2965 .addUse(SrcReg)
2966 .addImm(AArch64::dsub);
2967
2968 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
2969 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
2970
2971 // Save the register so that we can copy from it after.
2972 InsertRegs.push_back(InsertReg);
2973 }
2974 }
2975
2976 // Now that we've created any necessary subregister inserts, we can
2977 // create the copies.
2978 //
2979 // Perform the first copy separately as a subregister copy.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002980 Register CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00002981 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
2982 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00002983 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002984
2985 // Now, perform the remaining copies as vector lane copies.
2986 unsigned LaneIdx = 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002987 for (Register InsReg : InsertRegs) {
2988 Register CopyTo = I.getOperand(LaneIdx).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002989 MachineInstr &CopyInst =
2990 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
2991 .addUse(InsReg)
2992 .addImm(LaneIdx);
2993 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
2994 ++LaneIdx;
2995 }
2996
2997 // Separately constrain the first copy's destination. Because of the
2998 // limitation in constrainOperandRegClass, we can't guarantee that this will
2999 // actually be constrained. So, do it ourselves using the second operand.
3000 const TargetRegisterClass *RC =
3001 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3002 if (!RC) {
3003 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3004 return false;
3005 }
3006
3007 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3008 I.eraseFromParent();
3009 return true;
3010}
3011
Amara Emerson2ff22982019-03-14 22:48:15 +00003012bool AArch64InstructionSelector::selectConcatVectors(
3013 MachineInstr &I, MachineRegisterInfo &MRI) const {
3014 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3015 "Unexpected opcode");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003016 Register Dst = I.getOperand(0).getReg();
3017 Register Op1 = I.getOperand(1).getReg();
3018 Register Op2 = I.getOperand(2).getReg();
Amara Emerson2ff22982019-03-14 22:48:15 +00003019 MachineIRBuilder MIRBuilder(I);
3020 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3021 if (!ConcatMI)
3022 return false;
3023 I.eraseFromParent();
3024 return true;
3025}
3026
Amara Emerson1abe05c2019-02-21 20:20:16 +00003027void AArch64InstructionSelector::collectShuffleMaskIndices(
3028 MachineInstr &I, MachineRegisterInfo &MRI,
Amara Emerson2806fd02019-04-12 21:31:21 +00003029 SmallVectorImpl<Optional<int>> &Idxs) const {
Amara Emerson1abe05c2019-02-21 20:20:16 +00003030 MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
3031 assert(
3032 MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
3033 "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
3034 // Find the constant indices.
3035 for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
Amara Emerson1abe05c2019-02-21 20:20:16 +00003036 // Look through copies.
Jessica Paquette31329682019-07-10 18:44:57 +00003037 MachineInstr *ScalarDef =
3038 getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
3039 assert(ScalarDef && "Could not find vreg def of shufflevec index op");
Amara Emerson2806fd02019-04-12 21:31:21 +00003040 if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
3041 // This be an undef if not a constant.
3042 assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
3043 Idxs.push_back(None);
3044 } else {
3045 Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
3046 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00003047 }
3048}
3049
3050unsigned
3051AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3052 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00003053 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003054 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3055 if (Align == 0)
3056 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3057
3058 MachineConstantPool *MCP = MF.getConstantPool();
3059 return MCP->getConstantPoolIndex(CPVal, Align);
3060}
3061
3062MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3063 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3064 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3065
3066 auto Adrp =
3067 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3068 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003069
3070 MachineInstr *LoadMI = nullptr;
3071 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3072 case 16:
3073 LoadMI =
3074 &*MIRBuilder
3075 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3076 .addConstantPoolIndex(CPIdx, 0,
3077 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3078 break;
3079 case 8:
3080 LoadMI = &*MIRBuilder
3081 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3082 .addConstantPoolIndex(
3083 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3084 break;
3085 default:
3086 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3087 << *CPVal->getType());
3088 return nullptr;
3089 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00003090 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003091 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3092 return LoadMI;
3093}
3094
3095/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3096/// size and RB.
3097static std::pair<unsigned, unsigned>
3098getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3099 unsigned Opc, SubregIdx;
3100 if (RB.getID() == AArch64::GPRRegBankID) {
3101 if (EltSize == 32) {
3102 Opc = AArch64::INSvi32gpr;
3103 SubregIdx = AArch64::ssub;
3104 } else if (EltSize == 64) {
3105 Opc = AArch64::INSvi64gpr;
3106 SubregIdx = AArch64::dsub;
3107 } else {
3108 llvm_unreachable("invalid elt size!");
3109 }
3110 } else {
3111 if (EltSize == 8) {
3112 Opc = AArch64::INSvi8lane;
3113 SubregIdx = AArch64::bsub;
3114 } else if (EltSize == 16) {
3115 Opc = AArch64::INSvi16lane;
3116 SubregIdx = AArch64::hsub;
3117 } else if (EltSize == 32) {
3118 Opc = AArch64::INSvi32lane;
3119 SubregIdx = AArch64::ssub;
3120 } else if (EltSize == 64) {
3121 Opc = AArch64::INSvi64lane;
3122 SubregIdx = AArch64::dsub;
3123 } else {
3124 llvm_unreachable("invalid elt size!");
3125 }
3126 }
3127 return std::make_pair(Opc, SubregIdx);
3128}
3129
Jessica Paquette99316042019-07-02 19:44:16 +00003130MachineInstr *
Jessica Paquette728b18f2019-07-24 23:11:01 +00003131AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3132 MachineOperand &RHS,
3133 MachineIRBuilder &MIRBuilder) const {
3134 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3135 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3136 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3137 {AArch64::ADDWrr, AArch64::ADDWri}};
3138 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3139 auto ImmFns = selectArithImmed(RHS);
3140 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3141 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3142
3143 // If we matched a valid constant immediate, add those operands.
3144 if (ImmFns) {
3145 for (auto &RenderFn : *ImmFns)
3146 RenderFn(AddMI);
3147 } else {
3148 AddMI.addUse(RHS.getReg());
3149 }
3150
3151 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3152 return &*AddMI;
3153}
3154
3155MachineInstr *
Jessica Paquette99316042019-07-02 19:44:16 +00003156AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3157 MachineIRBuilder &MIRBuilder) const {
3158 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3159 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3160 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3161 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3162 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3163 auto ImmFns = selectArithImmed(RHS);
3164 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3165 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3166
3167 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3168
3169 // If we matched a valid constant immediate, add those operands.
3170 if (ImmFns) {
3171 for (auto &RenderFn : *ImmFns)
3172 RenderFn(CmpMI);
3173 } else {
3174 CmpMI.addUse(RHS.getReg());
3175 }
3176
3177 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3178 return &*CmpMI;
3179}
3180
Jessica Paquette55d19242019-07-08 22:58:36 +00003181MachineInstr *
3182AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3183 MachineIRBuilder &MIRBuilder) const {
3184 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3185 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3186 bool Is32Bit = (RegSize == 32);
3187 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3188 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3189 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3190
3191 // We might be able to fold in an immediate into the TST. We need to make sure
3192 // it's a logical immediate though, since ANDS requires that.
3193 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3194 bool IsImmForm = ValAndVReg.hasValue() &&
3195 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3196 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3197 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3198
3199 if (IsImmForm)
3200 TstMI.addImm(
3201 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3202 else
3203 TstMI.addUse(RHS);
3204
3205 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3206 return &*TstMI;
3207}
3208
Jessica Paquette99316042019-07-02 19:44:16 +00003209MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3210 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3211 MachineIRBuilder &MIRBuilder) const {
3212 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3213 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3214
Jessica Paquette55d19242019-07-08 22:58:36 +00003215 // Fold the compare if possible.
3216 MachineInstr *FoldCmp =
3217 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3218 if (FoldCmp)
3219 return FoldCmp;
Jessica Paquette99316042019-07-02 19:44:16 +00003220
3221 // Can't fold into a CMN. Just emit a normal compare.
3222 unsigned CmpOpc = 0;
3223 Register ZReg;
3224
3225 LLT CmpTy = MRI.getType(LHS.getReg());
Jessica Paquette65841092019-07-03 18:30:01 +00003226 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3227 "Expected scalar or pointer");
Jessica Paquette99316042019-07-02 19:44:16 +00003228 if (CmpTy == LLT::scalar(32)) {
3229 CmpOpc = AArch64::SUBSWrr;
3230 ZReg = AArch64::WZR;
3231 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3232 CmpOpc = AArch64::SUBSXrr;
3233 ZReg = AArch64::XZR;
3234 } else {
3235 return nullptr;
3236 }
3237
3238 // Try to match immediate forms.
3239 auto ImmFns = selectArithImmed(RHS);
3240 if (ImmFns)
3241 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3242
3243 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3244 // If we matched a valid constant immediate, add those operands.
3245 if (ImmFns) {
3246 for (auto &RenderFn : *ImmFns)
3247 RenderFn(CmpMI);
3248 } else {
3249 CmpMI.addUse(RHS.getReg());
3250 }
3251
3252 // Make sure that we can constrain the compare that we emitted.
3253 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3254 return &*CmpMI;
3255}
3256
Amara Emerson8acb0d92019-03-04 19:16:00 +00003257MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003258 Optional<Register> Dst, Register Op1, Register Op2,
Amara Emerson2ff22982019-03-14 22:48:15 +00003259 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00003260 // We implement a vector concat by:
3261 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3262 // 2. Insert the upper vector into the destination's upper element
3263 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3264 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3265
3266 const LLT Op1Ty = MRI.getType(Op1);
3267 const LLT Op2Ty = MRI.getType(Op2);
3268
3269 if (Op1Ty != Op2Ty) {
3270 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3271 return nullptr;
3272 }
3273 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3274
3275 if (Op1Ty.getSizeInBits() >= 128) {
3276 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3277 return nullptr;
3278 }
3279
3280 // At the moment we just support 64 bit vector concats.
3281 if (Op1Ty.getSizeInBits() != 64) {
3282 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3283 return nullptr;
3284 }
3285
3286 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3287 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3288 const TargetRegisterClass *DstRC =
3289 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3290
3291 MachineInstr *WidenedOp1 =
3292 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3293 MachineInstr *WidenedOp2 =
3294 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3295 if (!WidenedOp1 || !WidenedOp2) {
3296 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3297 return nullptr;
3298 }
3299
3300 // Now do the insert of the upper element.
3301 unsigned InsertOpc, InsSubRegIdx;
3302 std::tie(InsertOpc, InsSubRegIdx) =
3303 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3304
Amara Emerson2ff22982019-03-14 22:48:15 +00003305 if (!Dst)
3306 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003307 auto InsElt =
3308 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00003309 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00003310 .addImm(1) /* Lane index */
3311 .addUse(WidenedOp2->getOperand(0).getReg())
3312 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003313 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3314 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003315}
3316
Jessica Paquettea3843fe2019-05-01 22:39:43 +00003317MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3318 MachineInstr &I, MachineRegisterInfo &MRI) const {
3319 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3320 "Expected a G_FCONSTANT!");
3321 MachineOperand &ImmOp = I.getOperand(1);
3322 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3323
3324 // Only handle 32 and 64 bit defs for now.
3325 if (DefSize != 32 && DefSize != 64)
3326 return nullptr;
3327
3328 // Don't handle null values using FMOV.
3329 if (ImmOp.getFPImm()->isNullValue())
3330 return nullptr;
3331
3332 // Get the immediate representation for the FMOV.
3333 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3334 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3335 : AArch64_AM::getFP64Imm(ImmValAPF);
3336
3337 // If this is -1, it means the immediate can't be represented as the requested
3338 // floating point value. Bail.
3339 if (Imm == -1)
3340 return nullptr;
3341
3342 // Update MI to represent the new FMOV instruction, constrain it, and return.
3343 ImmOp.ChangeToImmediate(Imm);
3344 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3345 I.setDesc(TII.get(MovOpc));
3346 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3347 return &I;
3348}
3349
Jessica Paquette49537bb2019-06-17 18:40:06 +00003350MachineInstr *
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003351AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +00003352 MachineIRBuilder &MIRBuilder) const {
3353 // CSINC increments the result when the predicate is false. Invert it.
3354 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3355 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3356 auto I =
3357 MIRBuilder
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003358 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette49537bb2019-06-17 18:40:06 +00003359 .addImm(InvCC);
3360 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3361 return &*I;
3362}
3363
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003364bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3365 MachineIRBuilder MIB(I);
3366 MachineRegisterInfo &MRI = *MIB.getMRI();
3367 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3368
3369 // We want to recognize this pattern:
3370 //
3371 // $z = G_FCMP pred, $x, $y
3372 // ...
3373 // $w = G_SELECT $z, $a, $b
3374 //
3375 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3376 // some copies/truncs in between.)
3377 //
3378 // If we see this, then we can emit something like this:
3379 //
3380 // fcmp $x, $y
3381 // fcsel $w, $a, $b, pred
3382 //
3383 // Rather than emitting both of the rather long sequences in the standard
3384 // G_FCMP/G_SELECT select methods.
3385
3386 // First, check if the condition is defined by a compare.
3387 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3388 while (CondDef) {
3389 // We can only fold if all of the defs have one use.
3390 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3391 return false;
3392
3393 // We can skip over G_TRUNC since the condition is 1-bit.
3394 // Truncating/extending can have no impact on the value.
3395 unsigned Opc = CondDef->getOpcode();
3396 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3397 break;
3398
Amara Emersond940e202019-06-06 07:33:47 +00003399 // Can't see past copies from physregs.
3400 if (Opc == TargetOpcode::COPY &&
Daniel Sanders2bea69b2019-08-01 23:27:28 +00003401 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
Amara Emersond940e202019-06-06 07:33:47 +00003402 return false;
3403
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003404 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3405 }
3406
3407 // Is the condition defined by a compare?
Jessica Paquette99316042019-07-02 19:44:16 +00003408 if (!CondDef)
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003409 return false;
3410
Jessica Paquette99316042019-07-02 19:44:16 +00003411 unsigned CondOpc = CondDef->getOpcode();
3412 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3413 return false;
3414
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003415 AArch64CC::CondCode CondCode;
Jessica Paquette99316042019-07-02 19:44:16 +00003416 if (CondOpc == TargetOpcode::G_ICMP) {
3417 CondCode = changeICMPPredToAArch64CC(
3418 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3419 if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3420 CondDef->getOperand(1), MIB)) {
3421 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3422 return false;
3423 }
3424 } else {
3425 // Get the condition code for the select.
3426 AArch64CC::CondCode CondCode2;
3427 changeFCMPPredToAArch64CC(
3428 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3429 CondCode2);
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003430
Jessica Paquette99316042019-07-02 19:44:16 +00003431 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3432 // instructions to emit the comparison.
3433 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3434 // unnecessary.
3435 if (CondCode2 != AArch64CC::AL)
3436 return false;
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003437
Jessica Paquette99316042019-07-02 19:44:16 +00003438 // Make sure we'll be able to select the compare.
3439 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3440 if (!CmpOpc)
3441 return false;
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003442
Jessica Paquette99316042019-07-02 19:44:16 +00003443 // Emit a new compare.
3444 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3445 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3446 Cmp.addUse(CondDef->getOperand(3).getReg());
3447 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3448 }
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003449
3450 // Emit the select.
3451 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3452 auto CSel =
3453 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3454 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3455 .addImm(CondCode);
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003456 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3457 I.eraseFromParent();
3458 return true;
3459}
3460
Jessica Paquette55d19242019-07-08 22:58:36 +00003461MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3462 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3463 MachineIRBuilder &MIRBuilder) const {
Jessica Paquette99316042019-07-02 19:44:16 +00003464 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3465 "Unexpected MachineOperand");
Jessica Paquette49537bb2019-06-17 18:40:06 +00003466 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3467 // We want to find this sort of thing:
3468 // x = G_SUB 0, y
3469 // G_ICMP z, x
3470 //
3471 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3472 // e.g:
3473 //
3474 // cmn z, y
3475
Jessica Paquette49537bb2019-06-17 18:40:06 +00003476 // Helper lambda to detect the subtract followed by the compare.
3477 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3478 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3479 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3480 return false;
3481
3482 // Need to make sure NZCV is the same at the end of the transformation.
3483 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3484 return false;
3485
3486 // We want to match against SUBs.
3487 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3488 return false;
3489
3490 // Make sure that we're getting
3491 // x = G_SUB 0, y
3492 auto ValAndVReg =
3493 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3494 if (!ValAndVReg || ValAndVReg->Value != 0)
3495 return false;
3496
3497 // This can safely be represented as a CMN.
3498 return true;
3499 };
3500
3501 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
Jessica Paquette31329682019-07-10 18:44:57 +00003502 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3503 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
Jessica Paquette55d19242019-07-08 22:58:36 +00003504 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3505 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
Jessica Paquette99316042019-07-02 19:44:16 +00003506
Jessica Paquette55d19242019-07-08 22:58:36 +00003507 // Given this:
3508 //
3509 // x = G_SUB 0, y
3510 // G_ICMP x, z
3511 //
3512 // Produce this:
3513 //
3514 // cmn y, z
3515 if (IsCMN(LHSDef, CC))
3516 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3517
3518 // Same idea here, but with the RHS of the compare instead:
3519 //
3520 // Given this:
3521 //
3522 // x = G_SUB 0, y
3523 // G_ICMP z, x
3524 //
3525 // Produce this:
3526 //
3527 // cmn z, y
3528 if (IsCMN(RHSDef, CC))
3529 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3530
3531 // Given this:
3532 //
3533 // z = G_AND x, y
3534 // G_ICMP z, 0
3535 //
3536 // Produce this if the compare is signed:
3537 //
3538 // tst x, y
3539 if (!isUnsignedICMPPred(P) && LHSDef &&
3540 LHSDef->getOpcode() == TargetOpcode::G_AND) {
3541 // Make sure that the RHS is 0.
3542 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3543 if (!ValAndVReg || ValAndVReg->Value != 0)
3544 return nullptr;
3545
3546 return emitTST(LHSDef->getOperand(1).getReg(),
3547 LHSDef->getOperand(2).getReg(), MIRBuilder);
Jessica Paquette49537bb2019-06-17 18:40:06 +00003548 }
3549
Jessica Paquette99316042019-07-02 19:44:16 +00003550 return nullptr;
Jessica Paquette49537bb2019-06-17 18:40:06 +00003551}
3552
Amara Emerson761ca2e2019-03-19 21:43:05 +00003553bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3554 // Try to match a vector splat operation into a dup instruction.
3555 // We're looking for this pattern:
3556 // %scalar:gpr(s64) = COPY $x0
3557 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3558 // %cst0:gpr(s32) = G_CONSTANT i32 0
3559 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3560 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3561 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3562 // %zerovec(<2 x s32>)
3563 //
3564 // ...into:
3565 // %splat = DUP %scalar
3566 // We use the regbank of the scalar to determine which kind of dup to use.
3567 MachineIRBuilder MIB(I);
3568 MachineRegisterInfo &MRI = *MIB.getMRI();
3569 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3570 using namespace TargetOpcode;
3571 using namespace MIPatternMatch;
3572
3573 // Begin matching the insert.
3574 auto *InsMI =
Jessica Paquette7c959252019-07-10 18:46:56 +00003575 getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
Amara Emerson761ca2e2019-03-19 21:43:05 +00003576 if (!InsMI)
3577 return false;
3578 // Match the undef vector operand.
3579 auto *UndefMI =
Jessica Paquette7c959252019-07-10 18:46:56 +00003580 getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
Amara Emerson761ca2e2019-03-19 21:43:05 +00003581 if (!UndefMI)
3582 return false;
3583 // Match the scalar being splatted.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003584 Register ScalarReg = InsMI->getOperand(2).getReg();
Amara Emerson761ca2e2019-03-19 21:43:05 +00003585 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3586 // Match the index constant 0.
3587 int64_t Index = 0;
3588 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3589 return false;
3590
3591 // The shuffle's second operand doesn't matter if the mask is all zero.
Jessica Paquette7c959252019-07-10 18:46:56 +00003592 auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
Amara Emerson761ca2e2019-03-19 21:43:05 +00003593 if (!ZeroVec)
3594 return false;
3595 int64_t Zero = 0;
3596 if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
3597 return false;
Jessica Paquettec19c3072019-07-24 17:18:51 +00003598 for (unsigned i = 1, e = ZeroVec->getNumOperands(); i < e; ++i) {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003599 if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
3600 return false; // This wasn't an all zeros vector.
3601 }
3602
3603 // We're done, now find out what kind of splat we need.
3604 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3605 LLT EltTy = VecTy.getElementType();
3606 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3607 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3608 return false;
3609 }
3610 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3611 static const unsigned OpcTable[2][2] = {
3612 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3613 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3614 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3615
3616 // For FP splats, we need to widen the scalar reg via undef too.
3617 if (IsFP) {
3618 MachineInstr *Widen = emitScalarToVector(
3619 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3620 if (!Widen)
3621 return false;
3622 ScalarReg = Widen->getOperand(0).getReg();
3623 }
3624 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3625 if (IsFP)
3626 Dup.addImm(0);
3627 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3628 I.eraseFromParent();
3629 return true;
3630}
3631
3632bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3633 if (TM.getOptLevel() == CodeGenOpt::None)
3634 return false;
3635 if (tryOptVectorDup(I))
3636 return true;
3637 return false;
3638}
3639
Amara Emerson1abe05c2019-02-21 20:20:16 +00003640bool AArch64InstructionSelector::selectShuffleVector(
3641 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003642 if (tryOptVectorShuffle(I))
3643 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003644 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003645 Register Src1Reg = I.getOperand(1).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003646 const LLT Src1Ty = MRI.getType(Src1Reg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003647 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003648 const LLT Src2Ty = MRI.getType(Src2Reg);
3649
3650 MachineBasicBlock &MBB = *I.getParent();
3651 MachineFunction &MF = *MBB.getParent();
3652 LLVMContext &Ctx = MF.getFunction().getContext();
3653
3654 // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
3655 // operand, it comes in as a normal vector value which we have to analyze to
Amara Emerson2806fd02019-04-12 21:31:21 +00003656 // find the mask indices. If the mask element is undef, then
3657 // collectShuffleMaskIndices() will add a None entry for that index into
3658 // the list.
3659 SmallVector<Optional<int>, 8> Mask;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003660 collectShuffleMaskIndices(I, MRI, Mask);
3661 assert(!Mask.empty() && "Expected to find mask indices");
3662
3663 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3664 // it's originated from a <1 x T> type. Those should have been lowered into
3665 // G_BUILD_VECTOR earlier.
3666 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3667 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3668 return false;
3669 }
3670
3671 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3672
3673 SmallVector<Constant *, 64> CstIdxs;
Amara Emerson2806fd02019-04-12 21:31:21 +00003674 for (auto &MaybeVal : Mask) {
3675 // For now, any undef indexes we'll just assume to be 0. This should be
3676 // optimized in future, e.g. to select DUP etc.
3677 int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003678 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3679 unsigned Offset = Byte + Val * BytesPerElt;
3680 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3681 }
3682 }
3683
Amara Emerson8acb0d92019-03-04 19:16:00 +00003684 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003685
3686 // Use a constant pool to load the index vector for TBL.
3687 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003688 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3689 if (!IndexLoad) {
3690 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3691 return false;
3692 }
3693
Amara Emerson8acb0d92019-03-04 19:16:00 +00003694 if (DstTy.getSizeInBits() != 128) {
3695 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3696 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00003697 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003698 if (!Concat) {
3699 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3700 return false;
3701 }
3702
3703 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3704 IndexLoad =
3705 emitScalarToVector(64, &AArch64::FPR128RegClass,
3706 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3707
3708 auto TBL1 = MIRBuilder.buildInstr(
3709 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3710 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3711 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3712
Amara Emerson3739a202019-03-15 21:59:50 +00003713 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00003714 MIRBuilder
3715 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3716 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003717 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3718 I.eraseFromParent();
3719 return true;
3720 }
3721
Amara Emerson1abe05c2019-02-21 20:20:16 +00003722 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3723 // Q registers for regalloc.
3724 auto RegSeq = MIRBuilder
3725 .buildInstr(TargetOpcode::REG_SEQUENCE,
3726 {&AArch64::QQRegClass}, {Src1Reg})
3727 .addImm(AArch64::qsub0)
3728 .addUse(Src2Reg)
3729 .addImm(AArch64::qsub1);
3730
3731 auto TBL2 =
3732 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3733 {RegSeq, IndexLoad->getOperand(0).getReg()});
3734 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3735 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3736 I.eraseFromParent();
3737 return true;
3738}
3739
Jessica Paquette16d67a32019-03-13 23:22:23 +00003740MachineInstr *AArch64InstructionSelector::emitLaneInsert(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003741 Optional<Register> DstReg, Register SrcReg, Register EltReg,
Jessica Paquette16d67a32019-03-13 23:22:23 +00003742 unsigned LaneIdx, const RegisterBank &RB,
3743 MachineIRBuilder &MIRBuilder) const {
3744 MachineInstr *InsElt = nullptr;
3745 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3746 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3747
3748 // Create a register to define with the insert if one wasn't passed in.
3749 if (!DstReg)
3750 DstReg = MRI.createVirtualRegister(DstRC);
3751
3752 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3753 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3754
3755 if (RB.getID() == AArch64::FPRRegBankID) {
3756 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3757 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3758 .addImm(LaneIdx)
3759 .addUse(InsSub->getOperand(0).getReg())
3760 .addImm(0);
3761 } else {
3762 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3763 .addImm(LaneIdx)
3764 .addUse(EltReg);
3765 }
3766
3767 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3768 return InsElt;
3769}
3770
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003771bool AArch64InstructionSelector::selectInsertElt(
3772 MachineInstr &I, MachineRegisterInfo &MRI) const {
3773 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3774
3775 // Get information on the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003776 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003777 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003778 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003779
3780 // Get information on the element we want to insert into the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003781 Register EltReg = I.getOperand(2).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003782 const LLT EltTy = MRI.getType(EltReg);
3783 unsigned EltSize = EltTy.getSizeInBits();
3784 if (EltSize < 16 || EltSize > 64)
3785 return false; // Don't support all element types yet.
3786
3787 // Find the definition of the index. Bail out if it's not defined by a
3788 // G_CONSTANT.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003789 Register IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00003790 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3791 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003792 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00003793 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003794
3795 // Perform the lane insert.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003796 Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003797 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3798 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003799
3800 if (VecSize < 128) {
3801 // If the vector we're inserting into is smaller than 128 bits, widen it
3802 // to 128 to do the insert.
3803 MachineInstr *ScalarToVec = emitScalarToVector(
3804 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3805 if (!ScalarToVec)
3806 return false;
3807 SrcReg = ScalarToVec->getOperand(0).getReg();
3808 }
3809
3810 // Create an insert into a new FPR128 register.
3811 // Note that if our vector is already 128 bits, we end up emitting an extra
3812 // register.
3813 MachineInstr *InsMI =
3814 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3815
3816 if (VecSize < 128) {
3817 // If we had to widen to perform the insert, then we have to demote back to
3818 // the original size to get the result we want.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003819 Register DemoteVec = InsMI->getOperand(0).getReg();
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003820 const TargetRegisterClass *RC =
3821 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3822 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3823 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3824 return false;
3825 }
3826 unsigned SubReg = 0;
3827 if (!getSubRegForClass(RC, TRI, SubReg))
3828 return false;
3829 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3830 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3831 << "\n");
3832 return false;
3833 }
3834 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3835 .addReg(DemoteVec, 0, SubReg);
3836 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3837 } else {
3838 // No widening needed.
3839 InsMI->getOperand(0).setReg(DstReg);
3840 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3841 }
3842
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003843 I.eraseFromParent();
3844 return true;
3845}
3846
Amara Emerson5ec14602018-12-10 18:44:58 +00003847bool AArch64InstructionSelector::selectBuildVector(
3848 MachineInstr &I, MachineRegisterInfo &MRI) const {
3849 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3850 // Until we port more of the optimized selections, for now just use a vector
3851 // insert sequence.
3852 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3853 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3854 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003855 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003856 return false; // Don't support all element types yet.
3857 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003858 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003859
3860 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003861 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003862 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3863 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003864 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003865 return false;
3866
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003867 Register DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003868 unsigned DstSize = DstTy.getSizeInBits();
3869
3870 // Keep track of the last MI we inserted. Later on, we might be able to save
3871 // a copy using it.
3872 MachineInstr *PrevMI = nullptr;
3873 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003874 // Note that if we don't do a subregister copy, we can end up making an
3875 // extra register.
3876 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3877 MIRBuilder);
3878 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003879 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003880
3881 // If DstTy's size in bits is less than 128, then emit a subregister copy
3882 // from DstVec to the last register we've defined.
3883 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003884 // Force this to be FPR using the destination vector.
3885 const TargetRegisterClass *RC =
3886 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003887 if (!RC)
3888 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003889 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3890 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3891 return false;
3892 }
3893
3894 unsigned SubReg = 0;
3895 if (!getSubRegForClass(RC, TRI, SubReg))
3896 return false;
3897 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3898 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3899 << "\n");
3900 return false;
3901 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003902
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003903 Register Reg = MRI.createVirtualRegister(RC);
3904 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003905
Amara Emerson86271782019-03-18 19:20:10 +00003906 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3907 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003908 MachineOperand &RegOp = I.getOperand(1);
3909 RegOp.setReg(Reg);
3910 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3911 } else {
3912 // We don't need a subregister copy. Save a copy by re-using the
3913 // destination register on the final insert.
3914 assert(PrevMI && "PrevMI was null?");
3915 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3916 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3917 }
3918
Amara Emerson5ec14602018-12-10 18:44:58 +00003919 I.eraseFromParent();
3920 return true;
3921}
3922
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003923/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3924/// ID if it exists, and 0 otherwise.
3925static unsigned findIntrinsicID(MachineInstr &I) {
3926 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3927 return Op.isIntrinsicID();
3928 });
3929 if (IntrinOp == I.operands_end())
3930 return 0;
3931 return IntrinOp->getIntrinsicID();
3932}
3933
Jessica Paquette22c62152019-04-02 19:57:26 +00003934/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3935/// intrinsic.
3936static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3937 switch (NumBytesToStore) {
Jessica Paquetteaa8b9992019-07-26 23:28:53 +00003938 // TODO: 1 and 2 byte stores
3939 case 4:
3940 return AArch64::STLXRW;
Jessica Paquette22c62152019-04-02 19:57:26 +00003941 case 8:
3942 return AArch64::STLXRX;
3943 default:
3944 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3945 << NumBytesToStore << ")\n");
3946 break;
3947 }
3948 return 0;
3949}
3950
3951bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3952 MachineInstr &I, MachineRegisterInfo &MRI) const {
3953 // Find the intrinsic ID.
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003954 unsigned IntrinID = findIntrinsicID(I);
3955 if (!IntrinID)
Jessica Paquette22c62152019-04-02 19:57:26 +00003956 return false;
Jessica Paquette22c62152019-04-02 19:57:26 +00003957 MachineIRBuilder MIRBuilder(I);
3958
3959 // Select the instruction.
3960 switch (IntrinID) {
3961 default:
3962 return false;
3963 case Intrinsic::trap:
3964 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3965 break;
Tom Tan7ecb5142019-06-21 23:38:05 +00003966 case Intrinsic::debugtrap:
3967 if (!STI.isTargetWindows())
3968 return false;
3969 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3970 break;
Jessica Paquette22c62152019-04-02 19:57:26 +00003971 case Intrinsic::aarch64_stlxr:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003972 Register StatReg = I.getOperand(0).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003973 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
3974 "Status register must be 32 bits!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003975 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003976
3977 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
3978 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
3979 return false;
3980 }
3981
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003982 Register PtrReg = I.getOperand(3).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00003983 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
3984
3985 // Expect only one memory operand.
3986 if (!I.hasOneMemOperand())
3987 return false;
3988
3989 const MachineMemOperand *MemOp = *I.memoperands_begin();
3990 unsigned NumBytesToStore = MemOp->getSize();
3991 unsigned Opc = getStlxrOpcode(NumBytesToStore);
3992 if (!Opc)
3993 return false;
Jessica Paquetteaa8b9992019-07-26 23:28:53 +00003994 unsigned NumBitsToStore = NumBytesToStore * 8;
3995 if (NumBitsToStore != 64) {
3996 // The intrinsic always has a 64-bit source, but we might actually want
3997 // a differently-sized source for the instruction. Try to get it.
3998 // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
3999 // just handle 4-byte stores.
4000 // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
4001 // to the right size for the STLXR.
4002 MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
4003 if (!Zext)
4004 return false;
4005 SrcReg = Zext->getOperand(1).getReg();
4006 // We should get an appropriately-sized register here.
4007 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
4008 return false;
4009 }
4010 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
4011 .addMemOperand(*I.memoperands_begin());
Jessica Paquette22c62152019-04-02 19:57:26 +00004012 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
4013 }
4014
4015 I.eraseFromParent();
4016 return true;
4017}
4018
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00004019bool AArch64InstructionSelector::selectIntrinsic(
4020 MachineInstr &I, MachineRegisterInfo &MRI) const {
4021 unsigned IntrinID = findIntrinsicID(I);
4022 if (!IntrinID)
4023 return false;
4024 MachineIRBuilder MIRBuilder(I);
4025
4026 switch (IntrinID) {
4027 default:
4028 break;
4029 case Intrinsic::aarch64_crypto_sha1h:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00004030 Register DstReg = I.getOperand(0).getReg();
4031 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00004032
4033 // FIXME: Should this be an assert?
4034 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4035 MRI.getType(SrcReg).getSizeInBits() != 32)
4036 return false;
4037
4038 // The operation has to happen on FPRs. Set up some new FPR registers for
4039 // the source and destination if they are on GPRs.
4040 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4041 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4042 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4043
4044 // Make sure the copy ends up getting constrained properly.
4045 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4046 AArch64::GPR32RegClass, MRI);
4047 }
4048
4049 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4050 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4051
4052 // Actually insert the instruction.
4053 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4054 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4055
4056 // Did we create a new register for the destination?
4057 if (DstReg != I.getOperand(0).getReg()) {
4058 // Yep. Copy the result of the instruction back into the original
4059 // destination.
4060 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4061 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4062 AArch64::GPR32RegClass, MRI);
4063 }
4064
4065 I.eraseFromParent();
4066 return true;
4067 }
4068 return false;
4069}
4070
Amara Emersoncac11512019-07-03 01:49:06 +00004071static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4072 auto &MI = *Root.getParent();
4073 auto &MBB = *MI.getParent();
4074 auto &MF = *MBB.getParent();
4075 auto &MRI = MF.getRegInfo();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004076 uint64_t Immed;
4077 if (Root.isImm())
4078 Immed = Root.getImm();
4079 else if (Root.isCImm())
4080 Immed = Root.getCImm()->getZExtValue();
4081 else if (Root.isReg()) {
Jessica Paquettea99cfee2019-07-03 17:46:23 +00004082 auto ValAndVReg =
4083 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4084 if (!ValAndVReg)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004085 return None;
Jessica Paquettea99cfee2019-07-03 17:46:23 +00004086 Immed = ValAndVReg->Value;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004087 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004088 return None;
Amara Emersoncac11512019-07-03 01:49:06 +00004089 return Immed;
4090}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004091
Amara Emersoncac11512019-07-03 01:49:06 +00004092InstructionSelector::ComplexRendererFns
4093AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4094 auto MaybeImmed = getImmedFromMO(Root);
4095 if (MaybeImmed == None || *MaybeImmed > 31)
4096 return None;
4097 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4098 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4099}
4100
4101InstructionSelector::ComplexRendererFns
4102AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4103 auto MaybeImmed = getImmedFromMO(Root);
4104 if (MaybeImmed == None || *MaybeImmed > 31)
4105 return None;
4106 uint64_t Enc = 31 - *MaybeImmed;
4107 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4108}
4109
4110InstructionSelector::ComplexRendererFns
4111AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4112 auto MaybeImmed = getImmedFromMO(Root);
4113 if (MaybeImmed == None || *MaybeImmed > 63)
4114 return None;
4115 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4116 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4117}
4118
4119InstructionSelector::ComplexRendererFns
4120AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4121 auto MaybeImmed = getImmedFromMO(Root);
4122 if (MaybeImmed == None || *MaybeImmed > 63)
4123 return None;
4124 uint64_t Enc = 63 - *MaybeImmed;
4125 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4126}
4127
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004128/// Helper to select an immediate value that can be represented as a 12-bit
4129/// value shifted left by either 0 or 12. If it is possible to do so, return
4130/// the immediate and shift value. If not, return None.
4131///
4132/// Used by selectArithImmed and selectNegArithImmed.
Amara Emersoncac11512019-07-03 01:49:06 +00004133InstructionSelector::ComplexRendererFns
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004134AArch64InstructionSelector::select12BitValueWithLeftShift(
4135 uint64_t Immed) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004136 unsigned ShiftAmt;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004137 if (Immed >> 12 == 0) {
4138 ShiftAmt = 0;
4139 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4140 ShiftAmt = 12;
4141 Immed = Immed >> 12;
4142 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004143 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004144
4145 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004146 return {{
4147 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4148 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4149 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004150}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00004151
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004152/// SelectArithImmed - Select an immediate value that can be represented as
4153/// a 12-bit value shifted left by either 0 or 12. If so, return true with
4154/// Val set to the 12-bit value and Shift set to the shifter operand.
4155InstructionSelector::ComplexRendererFns
4156AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4157 // This function is called from the addsub_shifted_imm ComplexPattern,
4158 // which lists [imm] as the list of opcode it's interested in, however
4159 // we still need to check whether the operand is actually an immediate
4160 // here because the ComplexPattern opcode list is only used in
4161 // root-level opcode matching.
4162 auto MaybeImmed = getImmedFromMO(Root);
4163 if (MaybeImmed == None)
4164 return None;
4165 return select12BitValueWithLeftShift(*MaybeImmed);
4166}
4167
4168/// SelectNegArithImmed - As above, but negates the value before trying to
4169/// select it.
4170InstructionSelector::ComplexRendererFns
4171AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4172 // We need a register here, because we need to know if we have a 64 or 32
4173 // bit immediate.
4174 if (!Root.isReg())
4175 return None;
4176 auto MaybeImmed = getImmedFromMO(Root);
4177 if (MaybeImmed == None)
4178 return None;
4179 uint64_t Immed = *MaybeImmed;
4180
4181 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4182 // have the opposite effect on the C flag, so this pattern mustn't match under
4183 // those circumstances.
4184 if (Immed == 0)
4185 return None;
4186
4187 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4188 // the root.
4189 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4190 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4191 Immed = ~((uint32_t)Immed) + 1;
4192 else
4193 Immed = ~Immed + 1ULL;
4194
4195 if (Immed & 0xFFFFFFFFFF000000ULL)
4196 return None;
4197
4198 Immed &= 0xFFFFFFULL;
4199 return select12BitValueWithLeftShift(Immed);
4200}
4201
Jessica Paquette2b404d02019-07-23 16:09:42 +00004202/// Return true if it is worth folding MI into an extended register. That is,
4203/// if it's safe to pull it into the addressing mode of a load or store as a
4204/// shift.
4205bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4206 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4207 // Always fold if there is one use, or if we're optimizing for size.
4208 Register DefReg = MI.getOperand(0).getReg();
4209 if (MRI.hasOneUse(DefReg) ||
4210 MI.getParent()->getParent()->getFunction().hasMinSize())
4211 return true;
4212
4213 // It's better to avoid folding and recomputing shifts when we don't have a
4214 // fastpath.
4215 if (!STI.hasLSLFast())
4216 return false;
4217
4218 // We have a fastpath, so folding a shift in and potentially computing it
4219 // many times may be beneficial. Check if this is only used in memory ops.
4220 // If it is, then we should fold.
4221 return all_of(MRI.use_instructions(DefReg),
4222 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4223}
4224
4225/// This is used for computing addresses like this:
4226///
4227/// ldr x1, [x2, x3, lsl #3]
4228///
4229/// Where x2 is the base register, and x3 is an offset register. The shift-left
4230/// is a constant value specific to this load instruction. That is, we'll never
4231/// see anything other than a 3 here (which corresponds to the size of the
4232/// element being loaded.)
4233InstructionSelector::ComplexRendererFns
4234AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4235 MachineOperand &Root, unsigned SizeInBytes) const {
4236 if (!Root.isReg())
4237 return None;
4238 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4239
4240 // Make sure that the memory op is a valid size.
4241 int64_t LegalShiftVal = Log2_32(SizeInBytes);
4242 if (LegalShiftVal == 0)
4243 return None;
4244
4245 // We want to find something like this:
4246 //
4247 // val = G_CONSTANT LegalShiftVal
4248 // shift = G_SHL off_reg val
4249 // ptr = G_GEP base_reg shift
4250 // x = G_LOAD ptr
4251 //
4252 // And fold it into this addressing mode:
4253 //
4254 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4255
4256 // Check if we can find the G_GEP.
4257 MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4258 if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4259 return None;
4260
Jessica Paquette68499112019-07-24 22:49:42 +00004261 // Now, try to match an opcode which will match our specific offset.
4262 // We want a G_SHL or a G_MUL.
4263 MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4264 if (!OffsetInst)
Jessica Paquette2b404d02019-07-23 16:09:42 +00004265 return None;
4266
Jessica Paquette68499112019-07-24 22:49:42 +00004267 unsigned OffsetOpc = OffsetInst->getOpcode();
4268 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
Jessica Paquette2b404d02019-07-23 16:09:42 +00004269 return None;
4270
Jessica Paquette68499112019-07-24 22:49:42 +00004271 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4272 return None;
4273
4274 // Now, try to find the specific G_CONSTANT. Start by assuming that the
4275 // register we will offset is the LHS, and the register containing the
4276 // constant is the RHS.
4277 Register OffsetReg = OffsetInst->getOperand(1).getReg();
4278 Register ConstantReg = OffsetInst->getOperand(2).getReg();
4279 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4280 if (!ValAndVReg) {
4281 // We didn't get a constant on the RHS. If the opcode is a shift, then
4282 // we're done.
4283 if (OffsetOpc == TargetOpcode::G_SHL)
4284 return None;
4285
4286 // If we have a G_MUL, we can use either register. Try looking at the RHS.
4287 std::swap(OffsetReg, ConstantReg);
4288 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4289 if (!ValAndVReg)
4290 return None;
4291 }
4292
Jessica Paquette2b404d02019-07-23 16:09:42 +00004293 // The value must fit into 3 bits, and must be positive. Make sure that is
4294 // true.
4295 int64_t ImmVal = ValAndVReg->Value;
Jessica Paquette68499112019-07-24 22:49:42 +00004296
4297 // Since we're going to pull this into a shift, the constant value must be
4298 // a power of 2. If we got a multiply, then we need to check this.
4299 if (OffsetOpc == TargetOpcode::G_MUL) {
4300 if (!isPowerOf2_32(ImmVal))
4301 return None;
4302
4303 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4304 ImmVal = Log2_32(ImmVal);
4305 }
4306
Jessica Paquette2b404d02019-07-23 16:09:42 +00004307 if ((ImmVal & 0x7) != ImmVal)
4308 return None;
4309
4310 // We are only allowed to shift by LegalShiftVal. This shift value is built
4311 // into the instruction, so we can't just use whatever we want.
4312 if (ImmVal != LegalShiftVal)
4313 return None;
4314
4315 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4316 // offset. Signify that we are shifting by setting the shift flag to 1.
4317 return {{
4318 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
Jessica Paquette68499112019-07-24 22:49:42 +00004319 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
Jessica Paquette2b404d02019-07-23 16:09:42 +00004320 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4321 [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4322 }};
4323}
4324
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00004325/// This is used for computing addresses like this:
4326///
4327/// ldr x1, [x2, x3]
4328///
4329/// Where x2 is the base register, and x3 is an offset register.
4330///
4331/// When possible (or profitable) to fold a G_GEP into the address calculation,
4332/// this will do so. Otherwise, it will return None.
4333InstructionSelector::ComplexRendererFns
4334AArch64InstructionSelector::selectAddrModeRegisterOffset(
4335 MachineOperand &Root) const {
4336 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4337
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00004338 // We need a GEP.
4339 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4340 if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4341 return None;
4342
4343 // If this is used more than once, let's not bother folding.
4344 // TODO: Check if they are memory ops. If they are, then we can still fold
4345 // without having to recompute anything.
4346 if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4347 return None;
4348
4349 // Base is the GEP's LHS, offset is its RHS.
4350 return {{
4351 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4352 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4353 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4354 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4355 }};
4356}
4357
Jessica Paquette2b404d02019-07-23 16:09:42 +00004358/// This is intended to be equivalent to selectAddrModeXRO in
4359/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4360InstructionSelector::ComplexRendererFns
4361AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4362 unsigned SizeInBytes) const {
4363 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4364
4365 // If we have a constant offset, then we probably don't want to match a
4366 // register offset.
4367 if (isBaseWithConstantOffset(Root, MRI))
4368 return None;
4369
4370 // Try to fold shifts into the addressing mode.
4371 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4372 if (AddrModeFns)
4373 return AddrModeFns;
4374
4375 // If that doesn't work, see if it's possible to fold in registers from
4376 // a GEP.
4377 return selectAddrModeRegisterOffset(Root);
4378}
4379
Daniel Sandersea8711b2017-10-16 03:36:29 +00004380/// Select a "register plus unscaled signed 9-bit immediate" address. This
4381/// should only match when there is an offset that is not valid for a scaled
4382/// immediate addressing mode. The "Size" argument is the size in bytes of the
4383/// memory reference, which is needed here to know what is valid for a scaled
4384/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00004385InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00004386AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4387 unsigned Size) const {
4388 MachineRegisterInfo &MRI =
4389 Root.getParent()->getParent()->getParent()->getRegInfo();
4390
4391 if (!Root.isReg())
4392 return None;
4393
4394 if (!isBaseWithConstantOffset(Root, MRI))
4395 return None;
4396
4397 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4398 if (!RootDef)
4399 return None;
4400
4401 MachineOperand &OffImm = RootDef->getOperand(2);
4402 if (!OffImm.isReg())
4403 return None;
4404 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4405 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4406 return None;
4407 int64_t RHSC;
4408 MachineOperand &RHSOp1 = RHS->getOperand(1);
4409 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4410 return None;
4411 RHSC = RHSOp1.getCImm()->getSExtValue();
4412
4413 // If the offset is valid as a scaled immediate, don't match here.
4414 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4415 return None;
4416 if (RHSC >= -256 && RHSC < 256) {
4417 MachineOperand &Base = RootDef->getOperand(1);
4418 return {{
4419 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4420 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4421 }};
4422 }
4423 return None;
4424}
4425
4426/// Select a "register plus scaled unsigned 12-bit immediate" address. The
4427/// "Size" argument is the size in bytes of the memory reference, which
4428/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00004429InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00004430AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4431 unsigned Size) const {
4432 MachineRegisterInfo &MRI =
4433 Root.getParent()->getParent()->getParent()->getRegInfo();
4434
4435 if (!Root.isReg())
4436 return None;
4437
4438 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4439 if (!RootDef)
4440 return None;
4441
4442 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4443 return {{
4444 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4445 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4446 }};
4447 }
4448
4449 if (isBaseWithConstantOffset(Root, MRI)) {
4450 MachineOperand &LHS = RootDef->getOperand(1);
4451 MachineOperand &RHS = RootDef->getOperand(2);
4452 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4453 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4454 if (LHSDef && RHSDef) {
4455 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4456 unsigned Scale = Log2_32(Size);
4457 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4458 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00004459 return {{
4460 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4461 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4462 }};
4463
Daniel Sandersea8711b2017-10-16 03:36:29 +00004464 return {{
4465 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4466 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4467 }};
4468 }
4469 }
4470 }
4471
4472 // Before falling back to our general case, check if the unscaled
4473 // instructions can handle this. If so, that's preferable.
4474 if (selectAddrModeUnscaled(Root, Size).hasValue())
4475 return None;
4476
4477 return {{
4478 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4479 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4480 }};
4481}
4482
Volkan Kelesf7f25682018-01-16 18:44:05 +00004483void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4484 const MachineInstr &MI) const {
4485 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4486 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4487 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4488 assert(CstVal && "Expected constant value");
4489 MIB.addImm(CstVal.getValue());
4490}
4491
Daniel Sanders0b5293f2017-04-06 09:49:34 +00004492namespace llvm {
4493InstructionSelector *
4494createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4495 AArch64Subtarget &Subtarget,
4496 AArch64RegisterBankInfo &RBI) {
4497 return new AArch64InstructionSelector(TM, Subtarget, RBI);
4498}
4499}