blob: 8958b09a03253592e5c41d476d84d6b69014bcd3 [file] [log] [blame]
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000014#include "AArch64InstrInfo.h"
Tim Northovere9600d82017-02-08 17:57:27 +000015#include "AArch64MachineFunctionInfo.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000016#include "AArch64RegisterBankInfo.h"
17#include "AArch64RegisterInfo.h"
18#include "AArch64Subtarget.h"
Tim Northoverbdf16242016-10-10 21:50:00 +000019#include "AArch64TargetMachine.h"
Tim Northover9ac0eba2016-11-08 00:45:29 +000020#include "MCTargetDesc/AArch64AddressingModes.h"
Amara Emerson2ff22982019-03-14 22:48:15 +000021#include "llvm/ADT/Optional.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000022#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
David Blaikie62651302017-10-26 23:39:54 +000023#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
Amara Emerson1e8c1642018-07-31 00:09:02 +000024#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Amara Emerson761ca2e2019-03-19 21:43:05 +000025#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
Aditya Nandakumar75ad9cc2017-04-19 20:48:50 +000026#include "llvm/CodeGen/GlobalISel/Utils.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000027#include "llvm/CodeGen/MachineBasicBlock.h"
Amara Emerson1abe05c2019-02-21 20:20:16 +000028#include "llvm/CodeGen/MachineConstantPool.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000029#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
Daniel Sanders0b5293f2017-04-06 09:49:34 +000032#include "llvm/CodeGen/MachineOperand.h"
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +000033#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/IR/Type.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37
38#define DEBUG_TYPE "aarch64-isel"
39
40using namespace llvm;
41
Daniel Sanders0b5293f2017-04-06 09:49:34 +000042namespace {
43
Daniel Sanderse7b0d662017-04-21 15:59:56 +000044#define GET_GLOBALISEL_PREDICATE_BITSET
45#include "AArch64GenGlobalISel.inc"
46#undef GET_GLOBALISEL_PREDICATE_BITSET
47
Daniel Sanders0b5293f2017-04-06 09:49:34 +000048class AArch64InstructionSelector : public InstructionSelector {
49public:
50 AArch64InstructionSelector(const AArch64TargetMachine &TM,
51 const AArch64Subtarget &STI,
52 const AArch64RegisterBankInfo &RBI);
53
Amara Emersone14c91b2019-08-13 06:26:59 +000054 bool select(MachineInstr &I) override;
David Blaikie62651302017-10-26 23:39:54 +000055 static const char *getName() { return DEBUG_TYPE; }
Daniel Sanders0b5293f2017-04-06 09:49:34 +000056
Amara Emersone14c91b2019-08-13 06:26:59 +000057 void setupMF(MachineFunction &MF, CodeGenCoverage &CoverageInfo) override {
58 InstructionSelector::setupMF(MF, CoverageInfo);
59
60 // hasFnAttribute() is expensive to call on every BRCOND selection, so
61 // cache it here for each run of the selector.
62 ProduceNonFlagSettingCondBr =
63 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
64 }
65
Daniel Sanders0b5293f2017-04-06 09:49:34 +000066private:
67 /// tblgen-erated 'select' implementation, used as the initial selector for
68 /// the patterns that don't require complex C++.
Daniel Sandersf76f3152017-11-16 00:46:35 +000069 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +000070
Amara Emersoncac11512019-07-03 01:49:06 +000071 // A lowering phase that runs before any selection attempts.
72
73 void preISelLower(MachineInstr &I) const;
74
75 // An early selection function that runs before the selectImpl() call.
76 bool earlySelect(MachineInstr &I) const;
77
78 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette7a1dcc52019-07-18 21:50:11 +000079 bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersoncac11512019-07-03 01:49:06 +000080
Jessica Paquette41affad2019-07-20 01:55:35 +000081 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
82 void contractCrossBankCopyIntoStore(MachineInstr &I,
83 MachineRegisterInfo &MRI) const;
84
Daniel Sanders0b5293f2017-04-06 09:49:34 +000085 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
86 MachineRegisterInfo &MRI) const;
87 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
88 MachineRegisterInfo &MRI) const;
89
90 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
91 MachineRegisterInfo &MRI) const;
92
Amara Emerson9bf092d2019-04-09 21:22:43 +000093 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
94 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
95
Amara Emerson5ec14602018-12-10 18:44:58 +000096 // Helper to generate an equivalent of scalar_to_vector into a new register,
97 // returned via 'Dst'.
Amara Emerson8acb0d92019-03-04 19:16:00 +000098 MachineInstr *emitScalarToVector(unsigned EltSize,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +000099 const TargetRegisterClass *DstRC,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000100 Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +0000101 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette16d67a32019-03-13 23:22:23 +0000102
103 /// Emit a lane insert into \p DstReg, or a new vector register if None is
104 /// provided.
105 ///
106 /// The lane inserted into is defined by \p LaneIdx. The vector source
107 /// register is given by \p SrcReg. The register containing the element is
108 /// given by \p EltReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000109 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
110 Register EltReg, unsigned LaneIdx,
Jessica Paquette16d67a32019-03-13 23:22:23 +0000111 const RegisterBank &RB,
112 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette5aff1f42019-03-14 18:01:30 +0000113 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +0000114 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson8cb186c2018-12-20 01:11:04 +0000115 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette245047d2019-01-24 22:00:41 +0000116 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson5ec14602018-12-10 18:44:58 +0000117
Amara Emerson1abe05c2019-02-21 20:20:16 +0000118 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette607774c2019-03-11 22:18:01 +0000119 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000120 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emersond61b89b2019-03-14 22:48:18 +0000121 bool selectSplitVectorUnmerge(MachineInstr &I,
122 MachineRegisterInfo &MRI) const;
Jessica Paquette22c62152019-04-02 19:57:26 +0000123 bool selectIntrinsicWithSideEffects(MachineInstr &I,
124 MachineRegisterInfo &MRI) const;
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +0000125 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson9bf092d2019-04-09 21:22:43 +0000126 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette991cb392019-04-23 20:46:19 +0000127 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
Jessica Paquette4fe75742019-04-23 23:03:03 +0000128 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson6e71b342019-06-21 18:10:41 +0000129 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
130 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
Tim Northover01eb8692019-08-09 09:32:38 +0000131 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
Amara Emerson6e71b342019-06-21 18:10:41 +0000132
Amara Emerson1abe05c2019-02-21 20:20:16 +0000133 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
134 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
135 MachineIRBuilder &MIRBuilder) const;
Amara Emerson2ff22982019-03-14 22:48:15 +0000136
137 // Emit a vector concat operation.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000138 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
139 Register Op2,
Amara Emerson8acb0d92019-03-04 19:16:00 +0000140 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette99316042019-07-02 19:44:16 +0000141 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
142 MachineOperand &Predicate,
143 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette728b18f2019-07-24 23:11:01 +0000144 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
145 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette99316042019-07-02 19:44:16 +0000146 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
147 MachineIRBuilder &MIRBuilder) const;
Jessica Paquette55d19242019-07-08 22:58:36 +0000148 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
149 MachineIRBuilder &MIRBuilder) const;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000150 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
Amara Emersond61b89b2019-03-14 22:48:18 +0000151 const RegisterBank &DstRB, LLT ScalarTy,
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000152 Register VecReg, unsigned LaneIdx,
Amara Emersond61b89b2019-03-14 22:48:18 +0000153 MachineIRBuilder &MIRBuilder) const;
Amara Emerson1abe05c2019-02-21 20:20:16 +0000154
Jessica Paquettea3843fe2019-05-01 22:39:43 +0000155 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
156 /// materialized using a FMOV instruction, then update MI and return it.
157 /// Otherwise, do nothing and return a nullptr.
158 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
159 MachineRegisterInfo &MRI) const;
160
Jessica Paquette49537bb2019-06-17 18:40:06 +0000161 /// Emit a CSet for a compare.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000162 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +0000163 MachineIRBuilder &MIRBuilder) const;
164
Amara Emersoncac11512019-07-03 01:49:06 +0000165 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
166 // We use these manually instead of using the importer since it doesn't
167 // support SDNodeXForm.
168 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
169 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
170 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
171 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
172
Jessica Paquettee4c46c32019-08-02 18:12:53 +0000173 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000174 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
Jessica Paquettee4c46c32019-08-02 18:12:53 +0000175 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000176
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000177 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
178 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000179
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000180 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000181 return selectAddrModeUnscaled(Root, 1);
182 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000183 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000184 return selectAddrModeUnscaled(Root, 2);
185 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000186 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000187 return selectAddrModeUnscaled(Root, 4);
188 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000189 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000190 return selectAddrModeUnscaled(Root, 8);
191 }
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000192 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000193 return selectAddrModeUnscaled(Root, 16);
194 }
195
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000196 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
197 unsigned Size) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000198 template <int Width>
Daniel Sanders1e4569f2017-10-20 20:55:29 +0000199 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
Daniel Sandersea8711b2017-10-16 03:36:29 +0000200 return selectAddrModeIndexed(Root, Width / 8);
201 }
Jessica Paquette2b404d02019-07-23 16:09:42 +0000202
203 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
204 const MachineRegisterInfo &MRI) const;
205 ComplexRendererFns
206 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
207 unsigned SizeInBytes) const;
Jessica Paquette7a1dcc52019-07-18 21:50:11 +0000208 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
Jessica Paquette2b404d02019-07-23 16:09:42 +0000209 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
210 unsigned SizeInBytes) const;
Daniel Sandersea8711b2017-10-16 03:36:29 +0000211
Jessica Paquette9a95e792019-08-20 22:18:06 +0000212 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
213
214 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
215 return selectShiftedRegister(Root);
216 }
217
218 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
219 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
220 // For now, make them the same. The only difference between the two is that
221 // logical shifts are allowed to fold in rotates. Otherwise, these are
222 // functionally the same.
223 return selectShiftedRegister(Root);
224 }
225
Volkan Kelesf7f25682018-01-16 18:44:05 +0000226 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
Jessica Paquettee6c299b2019-08-20 22:31:25 +0000227 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
228 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
Volkan Kelesf7f25682018-01-16 18:44:05 +0000229
Amara Emerson1e8c1642018-07-31 00:09:02 +0000230 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
231 void materializeLargeCMVal(MachineInstr &I, const Value *V,
Peter Collingbourne33773d52019-07-31 20:14:09 +0000232 unsigned OpFlags) const;
Amara Emerson1e8c1642018-07-31 00:09:02 +0000233
Amara Emerson761ca2e2019-03-19 21:43:05 +0000234 // Optimization methods.
Amara Emerson761ca2e2019-03-19 21:43:05 +0000235 bool tryOptVectorShuffle(MachineInstr &I) const;
236 bool tryOptVectorDup(MachineInstr &MI) const;
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000237 bool tryOptSelect(MachineInstr &MI) const;
Jessica Paquette55d19242019-07-08 22:58:36 +0000238 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
239 MachineOperand &Predicate,
240 MachineIRBuilder &MIRBuilder) const;
Amara Emerson761ca2e2019-03-19 21:43:05 +0000241
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000242 const AArch64TargetMachine &TM;
243 const AArch64Subtarget &STI;
244 const AArch64InstrInfo &TII;
245 const AArch64RegisterInfo &TRI;
246 const AArch64RegisterBankInfo &RBI;
Daniel Sanderse7b0d662017-04-21 15:59:56 +0000247
Amara Emersone14c91b2019-08-13 06:26:59 +0000248 bool ProduceNonFlagSettingCondBr = false;
249
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000250#define GET_GLOBALISEL_PREDICATES_DECL
251#include "AArch64GenGlobalISel.inc"
252#undef GET_GLOBALISEL_PREDICATES_DECL
Daniel Sanders0b5293f2017-04-06 09:49:34 +0000253
254// We declare the temporaries used by selectImpl() in the class to minimize the
255// cost of constructing placeholder values.
256#define GET_GLOBALISEL_TEMPORARIES_DECL
257#include "AArch64GenGlobalISel.inc"
258#undef GET_GLOBALISEL_TEMPORARIES_DECL
259};
260
261} // end anonymous namespace
262
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000263#define GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000264#include "AArch64GenGlobalISel.inc"
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000265#undef GET_GLOBALISEL_IMPL
Ahmed Bougacha36f70352016-12-21 23:26:20 +0000266
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000267AArch64InstructionSelector::AArch64InstructionSelector(
Tim Northoverbdf16242016-10-10 21:50:00 +0000268 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
269 const AArch64RegisterBankInfo &RBI)
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000270 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
Daniel Sanderse9fdba32017-04-29 17:30:09 +0000271 TRI(*STI.getRegisterInfo()), RBI(RBI),
272#define GET_GLOBALISEL_PREDICATES_INIT
273#include "AArch64GenGlobalISel.inc"
274#undef GET_GLOBALISEL_PREDICATES_INIT
Daniel Sanders8a4bae92017-03-14 21:32:08 +0000275#define GET_GLOBALISEL_TEMPORARIES_INIT
276#include "AArch64GenGlobalISel.inc"
277#undef GET_GLOBALISEL_TEMPORARIES_INIT
278{
279}
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000280
Tim Northoverfb8d9892016-10-12 22:49:15 +0000281// FIXME: This should be target-independent, inferred from the types declared
282// for each class in the bank.
283static const TargetRegisterClass *
284getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
Amara Emerson3838ed02018-02-02 18:03:30 +0000285 const RegisterBankInfo &RBI,
286 bool GetAllRegSet = false) {
Tim Northoverfb8d9892016-10-12 22:49:15 +0000287 if (RB.getID() == AArch64::GPRRegBankID) {
288 if (Ty.getSizeInBits() <= 32)
Amara Emerson3838ed02018-02-02 18:03:30 +0000289 return GetAllRegSet ? &AArch64::GPR32allRegClass
290 : &AArch64::GPR32RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000291 if (Ty.getSizeInBits() == 64)
Amara Emerson3838ed02018-02-02 18:03:30 +0000292 return GetAllRegSet ? &AArch64::GPR64allRegClass
293 : &AArch64::GPR64RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000294 return nullptr;
295 }
296
297 if (RB.getID() == AArch64::FPRRegBankID) {
Amara Emerson3838ed02018-02-02 18:03:30 +0000298 if (Ty.getSizeInBits() <= 16)
299 return &AArch64::FPR16RegClass;
Tim Northoverfb8d9892016-10-12 22:49:15 +0000300 if (Ty.getSizeInBits() == 32)
301 return &AArch64::FPR32RegClass;
302 if (Ty.getSizeInBits() == 64)
303 return &AArch64::FPR64RegClass;
304 if (Ty.getSizeInBits() == 128)
305 return &AArch64::FPR128RegClass;
306 return nullptr;
307 }
308
309 return nullptr;
310}
311
Jessica Paquette245047d2019-01-24 22:00:41 +0000312/// Given a register bank, and size in bits, return the smallest register class
313/// that can represent that combination.
Benjamin Kramer711950c2019-02-11 15:16:21 +0000314static const TargetRegisterClass *
315getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
316 bool GetAllRegSet = false) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000317 unsigned RegBankID = RB.getID();
318
319 if (RegBankID == AArch64::GPRRegBankID) {
320 if (SizeInBits <= 32)
321 return GetAllRegSet ? &AArch64::GPR32allRegClass
322 : &AArch64::GPR32RegClass;
323 if (SizeInBits == 64)
324 return GetAllRegSet ? &AArch64::GPR64allRegClass
325 : &AArch64::GPR64RegClass;
326 }
327
328 if (RegBankID == AArch64::FPRRegBankID) {
329 switch (SizeInBits) {
330 default:
331 return nullptr;
332 case 8:
333 return &AArch64::FPR8RegClass;
334 case 16:
335 return &AArch64::FPR16RegClass;
336 case 32:
337 return &AArch64::FPR32RegClass;
338 case 64:
339 return &AArch64::FPR64RegClass;
340 case 128:
341 return &AArch64::FPR128RegClass;
342 }
343 }
344
345 return nullptr;
346}
347
348/// Returns the correct subregister to use for a given register class.
349static bool getSubRegForClass(const TargetRegisterClass *RC,
350 const TargetRegisterInfo &TRI, unsigned &SubReg) {
351 switch (TRI.getRegSizeInBits(*RC)) {
352 case 8:
353 SubReg = AArch64::bsub;
354 break;
355 case 16:
356 SubReg = AArch64::hsub;
357 break;
358 case 32:
359 if (RC == &AArch64::GPR32RegClass)
360 SubReg = AArch64::sub_32;
361 else
362 SubReg = AArch64::ssub;
363 break;
364 case 64:
365 SubReg = AArch64::dsub;
366 break;
367 default:
368 LLVM_DEBUG(
369 dbgs() << "Couldn't find appropriate subregister for register class.");
370 return false;
371 }
372
373 return true;
374}
375
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000376/// Check whether \p I is a currently unsupported binary operation:
377/// - it has an unsized type
378/// - an operand is not a vreg
379/// - all operands are not in the same bank
380/// These are checks that should someday live in the verifier, but right now,
381/// these are mostly limitations of the aarch64 selector.
382static bool unsupportedBinOp(const MachineInstr &I,
383 const AArch64RegisterBankInfo &RBI,
384 const MachineRegisterInfo &MRI,
385 const AArch64RegisterInfo &TRI) {
Tim Northover0f140c72016-09-09 11:46:34 +0000386 LLT Ty = MRI.getType(I.getOperand(0).getReg());
Tim Northover32a078a2016-09-15 10:09:59 +0000387 if (!Ty.isValid()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000388 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000389 return true;
390 }
391
392 const RegisterBank *PrevOpBank = nullptr;
393 for (auto &MO : I.operands()) {
394 // FIXME: Support non-register operands.
395 if (!MO.isReg()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000396 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000397 return true;
398 }
399
400 // FIXME: Can generic operations have physical registers operands? If
401 // so, this will need to be taught about that, and we'll need to get the
402 // bank out of the minimal class for the register.
403 // Either way, this needs to be documented (and possibly verified).
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000404 if (!Register::isVirtualRegister(MO.getReg())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000405 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000406 return true;
407 }
408
409 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
410 if (!OpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000411 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000412 return true;
413 }
414
415 if (PrevOpBank && OpBank != PrevOpBank) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000416 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
Ahmed Bougacha59e160a2016-08-16 14:37:40 +0000417 return true;
418 }
419 PrevOpBank = OpBank;
420 }
421 return false;
422}
423
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000424/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
Ahmed Bougachacfb384d2017-01-23 21:10:05 +0000425/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000426/// and of size \p OpSize.
427/// \returns \p GenericOpc if the combination is unsupported.
428static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
429 unsigned OpSize) {
430 switch (RegBankID) {
431 case AArch64::GPRRegBankID:
Ahmed Bougacha05a5f7d2017-01-25 02:41:38 +0000432 if (OpSize == 32) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000433 switch (GenericOpc) {
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000434 case TargetOpcode::G_SHL:
435 return AArch64::LSLVWr;
436 case TargetOpcode::G_LSHR:
437 return AArch64::LSRVWr;
438 case TargetOpcode::G_ASHR:
439 return AArch64::ASRVWr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000440 default:
441 return GenericOpc;
442 }
Tim Northover55782222016-10-18 20:03:48 +0000443 } else if (OpSize == 64) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000444 switch (GenericOpc) {
Tim Northover2fda4b02016-10-10 21:49:49 +0000445 case TargetOpcode::G_GEP:
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000446 return AArch64::ADDXrr;
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +0000447 case TargetOpcode::G_SHL:
448 return AArch64::LSLVXr;
449 case TargetOpcode::G_LSHR:
450 return AArch64::LSRVXr;
451 case TargetOpcode::G_ASHR:
452 return AArch64::ASRVXr;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000453 default:
454 return GenericOpc;
455 }
456 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000457 break;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000458 case AArch64::FPRRegBankID:
459 switch (OpSize) {
460 case 32:
461 switch (GenericOpc) {
462 case TargetOpcode::G_FADD:
463 return AArch64::FADDSrr;
464 case TargetOpcode::G_FSUB:
465 return AArch64::FSUBSrr;
466 case TargetOpcode::G_FMUL:
467 return AArch64::FMULSrr;
468 case TargetOpcode::G_FDIV:
469 return AArch64::FDIVSrr;
470 default:
471 return GenericOpc;
472 }
473 case 64:
474 switch (GenericOpc) {
475 case TargetOpcode::G_FADD:
476 return AArch64::FADDDrr;
477 case TargetOpcode::G_FSUB:
478 return AArch64::FSUBDrr;
479 case TargetOpcode::G_FMUL:
480 return AArch64::FMULDrr;
481 case TargetOpcode::G_FDIV:
482 return AArch64::FDIVDrr;
Quentin Colombet0e531272016-10-11 00:21:11 +0000483 case TargetOpcode::G_OR:
484 return AArch64::ORRv8i8;
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +0000485 default:
486 return GenericOpc;
487 }
488 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000489 break;
490 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +0000491 return GenericOpc;
492}
493
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000494/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
495/// appropriate for the (value) register bank \p RegBankID and of memory access
496/// size \p OpSize. This returns the variant with the base+unsigned-immediate
497/// addressing mode (e.g., LDRXui).
498/// \returns \p GenericOpc if the combination is unsupported.
499static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
500 unsigned OpSize) {
501 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
502 switch (RegBankID) {
503 case AArch64::GPRRegBankID:
504 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000505 case 8:
506 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
507 case 16:
508 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000509 case 32:
510 return isStore ? AArch64::STRWui : AArch64::LDRWui;
511 case 64:
512 return isStore ? AArch64::STRXui : AArch64::LDRXui;
513 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000514 break;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000515 case AArch64::FPRRegBankID:
516 switch (OpSize) {
Tim Northover020d1042016-10-17 18:36:53 +0000517 case 8:
518 return isStore ? AArch64::STRBui : AArch64::LDRBui;
519 case 16:
520 return isStore ? AArch64::STRHui : AArch64::LDRHui;
Quentin Colombetd2623f8e2016-10-11 00:21:14 +0000521 case 32:
522 return isStore ? AArch64::STRSui : AArch64::LDRSui;
523 case 64:
524 return isStore ? AArch64::STRDui : AArch64::LDRDui;
525 }
Simon Pilgrim9e901522017-07-08 19:28:24 +0000526 break;
527 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +0000528 return GenericOpc;
529}
530
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000531#ifndef NDEBUG
Jessica Paquette245047d2019-01-24 22:00:41 +0000532/// Helper function that verifies that we have a valid copy at the end of
533/// selectCopy. Verifies that the source and dest have the expected sizes and
534/// then returns true.
535static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
536 const MachineRegisterInfo &MRI,
537 const TargetRegisterInfo &TRI,
538 const RegisterBankInfo &RBI) {
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000539 const Register DstReg = I.getOperand(0).getReg();
540 const Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000541 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
542 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
Amara Emersondb211892018-02-20 05:11:57 +0000543
Jessica Paquette245047d2019-01-24 22:00:41 +0000544 // Make sure the size of the source and dest line up.
545 assert(
546 (DstSize == SrcSize ||
547 // Copies are a mean to setup initial types, the number of
548 // bits may not exactly match.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000549 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
Jessica Paquette245047d2019-01-24 22:00:41 +0000550 // Copies are a mean to copy bits around, as long as we are
551 // on the same register class, that's fine. Otherwise, that
552 // means we need some SUBREG_TO_REG or AND & co.
553 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
554 "Copy with different width?!");
555
556 // Check the size of the destination.
557 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
558 "GPRs cannot get more than 64-bit width values");
559
560 return true;
561}
Benjamin Kramer1411ecf2019-01-24 23:39:47 +0000562#endif
Jessica Paquette245047d2019-01-24 22:00:41 +0000563
564/// Helper function for selectCopy. Inserts a subregister copy from
565/// \p *From to \p *To, linking it up to \p I.
566///
567/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
568///
569/// CopyReg (From class) = COPY SrcReg
570/// SubRegCopy (To class) = COPY CopyReg:SubReg
571/// Dst = COPY SubRegCopy
Amara Emerson3739a202019-03-15 21:59:50 +0000572static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
Daniel Sandersd9934d42019-08-06 17:16:27 +0000573 const RegisterBankInfo &RBI, Register SrcReg,
Jessica Paquette245047d2019-01-24 22:00:41 +0000574 const TargetRegisterClass *From,
575 const TargetRegisterClass *To,
576 unsigned SubReg) {
Amara Emerson3739a202019-03-15 21:59:50 +0000577 MachineIRBuilder MIB(I);
578 auto Copy = MIB.buildCopy({From}, {SrcReg});
Amara Emerson86271782019-03-18 19:20:10 +0000579 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
580 .addReg(Copy.getReg(0), 0, SubReg);
Amara Emersondb211892018-02-20 05:11:57 +0000581 MachineOperand &RegOp = I.getOperand(1);
Amara Emerson3739a202019-03-15 21:59:50 +0000582 RegOp.setReg(SubRegCopy.getReg(0));
Jessica Paquette245047d2019-01-24 22:00:41 +0000583
584 // It's possible that the destination register won't be constrained. Make
585 // sure that happens.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000586 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
Jessica Paquette245047d2019-01-24 22:00:41 +0000587 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
588
Amara Emersondb211892018-02-20 05:11:57 +0000589 return true;
590}
591
Jessica Paquette910630c2019-05-03 22:37:46 +0000592/// Helper function to get the source and destination register classes for a
593/// copy. Returns a std::pair containing the source register class for the
594/// copy, and the destination register class for the copy. If a register class
595/// cannot be determined, then it will be nullptr.
596static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
597getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
598 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
599 const RegisterBankInfo &RBI) {
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000600 Register DstReg = I.getOperand(0).getReg();
601 Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette910630c2019-05-03 22:37:46 +0000602 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
603 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
604 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
605 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
606
607 // Special casing for cross-bank copies of s1s. We can technically represent
608 // a 1-bit value with any size of register. The minimum size for a GPR is 32
609 // bits. So, we need to put the FPR on 32 bits as well.
610 //
611 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
612 // then we can pull it into the helpers that get the appropriate class for a
613 // register bank. Or make a new helper that carries along some constraint
614 // information.
615 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
616 SrcSize = DstSize = 32;
617
618 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
619 getMinClassForRegBank(DstRegBank, DstSize, true)};
620}
621
Quentin Colombetcb629a82016-10-12 03:57:49 +0000622static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
623 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
624 const RegisterBankInfo &RBI) {
625
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000626 Register DstReg = I.getOperand(0).getReg();
627 Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +0000628 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
629 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
Jessica Paquette910630c2019-05-03 22:37:46 +0000630
631 // Find the correct register classes for the source and destination registers.
632 const TargetRegisterClass *SrcRC;
633 const TargetRegisterClass *DstRC;
634 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
635
Jessica Paquette245047d2019-01-24 22:00:41 +0000636 if (!DstRC) {
637 LLVM_DEBUG(dbgs() << "Unexpected dest size "
638 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
Amara Emerson3838ed02018-02-02 18:03:30 +0000639 return false;
Quentin Colombetcb629a82016-10-12 03:57:49 +0000640 }
641
Jessica Paquette245047d2019-01-24 22:00:41 +0000642 // A couple helpers below, for making sure that the copy we produce is valid.
643
644 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
645 // to verify that the src and dst are the same size, since that's handled by
646 // the SUBREG_TO_REG.
647 bool KnownValid = false;
648
649 // Returns true, or asserts if something we don't expect happens. Instead of
650 // returning true, we return isValidCopy() to ensure that we verify the
651 // result.
Jessica Paquette76c40f82019-01-24 22:51:31 +0000652 auto CheckCopy = [&]() {
Jessica Paquette245047d2019-01-24 22:00:41 +0000653 // If we have a bitcast or something, we can't have physical registers.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000654 assert((I.isCopy() ||
655 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
656 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
657 "No phys reg on generic operator!");
Jessica Paquette245047d2019-01-24 22:00:41 +0000658 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
Jonas Hahnfeld65a401f2019-03-04 08:51:32 +0000659 (void)KnownValid;
Jessica Paquette245047d2019-01-24 22:00:41 +0000660 return true;
661 };
662
663 // Is this a copy? If so, then we may need to insert a subregister copy, or
664 // a SUBREG_TO_REG.
665 if (I.isCopy()) {
666 // Yes. Check if there's anything to fix up.
Amara Emerson7e9f3482018-02-18 17:10:49 +0000667 if (!SrcRC) {
Jessica Paquette245047d2019-01-24 22:00:41 +0000668 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
669 return false;
Amara Emerson7e9f3482018-02-18 17:10:49 +0000670 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000671
672 // Is this a cross-bank copy?
673 if (DstRegBank.getID() != SrcRegBank.getID()) {
674 // If we're doing a cross-bank copy on different-sized registers, we need
675 // to do a bit more work.
676 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
677 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
678
679 if (SrcSize > DstSize) {
680 // We're doing a cross-bank copy into a smaller register. We need a
681 // subregister copy. First, get a register class that's on the same bank
682 // as the destination, but the same size as the source.
683 const TargetRegisterClass *SubregRC =
684 getMinClassForRegBank(DstRegBank, SrcSize, true);
685 assert(SubregRC && "Didn't get a register class for subreg?");
686
687 // Get the appropriate subregister for the destination.
688 unsigned SubReg = 0;
689 if (!getSubRegForClass(DstRC, TRI, SubReg)) {
690 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
691 return false;
692 }
693
694 // Now, insert a subregister copy using the new register class.
Amara Emerson3739a202019-03-15 21:59:50 +0000695 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +0000696 return CheckCopy();
697 }
698
699 else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
700 SrcSize == 16) {
701 // Special case for FPR16 to GPR32.
702 // FIXME: This can probably be generalized like the above case.
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000703 Register PromoteReg =
Jessica Paquette245047d2019-01-24 22:00:41 +0000704 MRI.createVirtualRegister(&AArch64::FPR32RegClass);
705 BuildMI(*I.getParent(), I, I.getDebugLoc(),
706 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
707 .addImm(0)
708 .addUse(SrcReg)
709 .addImm(AArch64::hsub);
710 MachineOperand &RegOp = I.getOperand(1);
711 RegOp.setReg(PromoteReg);
712
713 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
714 KnownValid = true;
715 }
Amara Emerson7e9f3482018-02-18 17:10:49 +0000716 }
Jessica Paquette245047d2019-01-24 22:00:41 +0000717
718 // If the destination is a physical register, then there's nothing to
719 // change, so we're done.
Daniel Sanders2bea69b2019-08-01 23:27:28 +0000720 if (Register::isPhysicalRegister(DstReg))
Jessica Paquette245047d2019-01-24 22:00:41 +0000721 return CheckCopy();
Amara Emerson7e9f3482018-02-18 17:10:49 +0000722 }
723
Jessica Paquette245047d2019-01-24 22:00:41 +0000724 // No need to constrain SrcReg. It will get constrained when we hit another
725 // of its use or its defs. Copies do not have constraints.
726 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000727 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
728 << " operand\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +0000729 return false;
730 }
731 I.setDesc(TII.get(AArch64::COPY));
Jessica Paquette245047d2019-01-24 22:00:41 +0000732 return CheckCopy();
Quentin Colombetcb629a82016-10-12 03:57:49 +0000733}
734
Tim Northover69271c62016-10-12 22:49:11 +0000735static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
736 if (!DstTy.isScalar() || !SrcTy.isScalar())
737 return GenericOpc;
738
739 const unsigned DstSize = DstTy.getSizeInBits();
740 const unsigned SrcSize = SrcTy.getSizeInBits();
741
742 switch (DstSize) {
743 case 32:
744 switch (SrcSize) {
745 case 32:
746 switch (GenericOpc) {
747 case TargetOpcode::G_SITOFP:
748 return AArch64::SCVTFUWSri;
749 case TargetOpcode::G_UITOFP:
750 return AArch64::UCVTFUWSri;
751 case TargetOpcode::G_FPTOSI:
752 return AArch64::FCVTZSUWSr;
753 case TargetOpcode::G_FPTOUI:
754 return AArch64::FCVTZUUWSr;
755 default:
756 return GenericOpc;
757 }
758 case 64:
759 switch (GenericOpc) {
760 case TargetOpcode::G_SITOFP:
761 return AArch64::SCVTFUXSri;
762 case TargetOpcode::G_UITOFP:
763 return AArch64::UCVTFUXSri;
764 case TargetOpcode::G_FPTOSI:
765 return AArch64::FCVTZSUWDr;
766 case TargetOpcode::G_FPTOUI:
767 return AArch64::FCVTZUUWDr;
768 default:
769 return GenericOpc;
770 }
771 default:
772 return GenericOpc;
773 }
774 case 64:
775 switch (SrcSize) {
776 case 32:
777 switch (GenericOpc) {
778 case TargetOpcode::G_SITOFP:
779 return AArch64::SCVTFUWDri;
780 case TargetOpcode::G_UITOFP:
781 return AArch64::UCVTFUWDri;
782 case TargetOpcode::G_FPTOSI:
783 return AArch64::FCVTZSUXSr;
784 case TargetOpcode::G_FPTOUI:
785 return AArch64::FCVTZUUXSr;
786 default:
787 return GenericOpc;
788 }
789 case 64:
790 switch (GenericOpc) {
791 case TargetOpcode::G_SITOFP:
792 return AArch64::SCVTFUXDri;
793 case TargetOpcode::G_UITOFP:
794 return AArch64::UCVTFUXDri;
795 case TargetOpcode::G_FPTOSI:
796 return AArch64::FCVTZSUXDr;
797 case TargetOpcode::G_FPTOUI:
798 return AArch64::FCVTZUUXDr;
799 default:
800 return GenericOpc;
801 }
802 default:
803 return GenericOpc;
804 }
805 default:
806 return GenericOpc;
807 };
808 return GenericOpc;
809}
810
Amara Emersonc37ff0d2019-06-05 23:46:16 +0000811static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
812 const RegisterBankInfo &RBI) {
813 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
814 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
815 AArch64::GPRRegBankID);
816 LLT Ty = MRI.getType(I.getOperand(0).getReg());
817 if (Ty == LLT::scalar(32))
818 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
819 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
820 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
821 return 0;
822}
823
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +0000824/// Helper function to select the opcode for a G_FCMP.
825static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
826 // If this is a compare against +0.0, then we don't have to explicitly
827 // materialize a constant.
828 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
829 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
830 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
831 if (OpSize != 32 && OpSize != 64)
832 return 0;
833 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
834 {AArch64::FCMPSri, AArch64::FCMPDri}};
835 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
836}
837
Jessica Paquette55d19242019-07-08 22:58:36 +0000838/// Returns true if \p P is an unsigned integer comparison predicate.
839static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
840 switch (P) {
841 default:
842 return false;
843 case CmpInst::ICMP_UGT:
844 case CmpInst::ICMP_UGE:
845 case CmpInst::ICMP_ULT:
846 case CmpInst::ICMP_ULE:
847 return true;
848 }
849}
850
Tim Northover6c02ad52016-10-12 22:49:04 +0000851static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
852 switch (P) {
853 default:
854 llvm_unreachable("Unknown condition code!");
855 case CmpInst::ICMP_NE:
856 return AArch64CC::NE;
857 case CmpInst::ICMP_EQ:
858 return AArch64CC::EQ;
859 case CmpInst::ICMP_SGT:
860 return AArch64CC::GT;
861 case CmpInst::ICMP_SGE:
862 return AArch64CC::GE;
863 case CmpInst::ICMP_SLT:
864 return AArch64CC::LT;
865 case CmpInst::ICMP_SLE:
866 return AArch64CC::LE;
867 case CmpInst::ICMP_UGT:
868 return AArch64CC::HI;
869 case CmpInst::ICMP_UGE:
870 return AArch64CC::HS;
871 case CmpInst::ICMP_ULT:
872 return AArch64CC::LO;
873 case CmpInst::ICMP_ULE:
874 return AArch64CC::LS;
875 }
876}
877
Tim Northover7dd378d2016-10-12 22:49:07 +0000878static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
879 AArch64CC::CondCode &CondCode,
880 AArch64CC::CondCode &CondCode2) {
881 CondCode2 = AArch64CC::AL;
882 switch (P) {
883 default:
884 llvm_unreachable("Unknown FP condition!");
885 case CmpInst::FCMP_OEQ:
886 CondCode = AArch64CC::EQ;
887 break;
888 case CmpInst::FCMP_OGT:
889 CondCode = AArch64CC::GT;
890 break;
891 case CmpInst::FCMP_OGE:
892 CondCode = AArch64CC::GE;
893 break;
894 case CmpInst::FCMP_OLT:
895 CondCode = AArch64CC::MI;
896 break;
897 case CmpInst::FCMP_OLE:
898 CondCode = AArch64CC::LS;
899 break;
900 case CmpInst::FCMP_ONE:
901 CondCode = AArch64CC::MI;
902 CondCode2 = AArch64CC::GT;
903 break;
904 case CmpInst::FCMP_ORD:
905 CondCode = AArch64CC::VC;
906 break;
907 case CmpInst::FCMP_UNO:
908 CondCode = AArch64CC::VS;
909 break;
910 case CmpInst::FCMP_UEQ:
911 CondCode = AArch64CC::EQ;
912 CondCode2 = AArch64CC::VS;
913 break;
914 case CmpInst::FCMP_UGT:
915 CondCode = AArch64CC::HI;
916 break;
917 case CmpInst::FCMP_UGE:
918 CondCode = AArch64CC::PL;
919 break;
920 case CmpInst::FCMP_ULT:
921 CondCode = AArch64CC::LT;
922 break;
923 case CmpInst::FCMP_ULE:
924 CondCode = AArch64CC::LE;
925 break;
926 case CmpInst::FCMP_UNE:
927 CondCode = AArch64CC::NE;
928 break;
929 }
930}
931
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000932bool AArch64InstructionSelector::selectCompareBranch(
933 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
934
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000935 const Register CondReg = I.getOperand(0).getReg();
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000936 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
937 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
Aditya Nandakumar02c602e2017-07-31 17:00:16 +0000938 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
939 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000940 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
941 return false;
942
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000943 Register LHS = CCMI->getOperand(2).getReg();
944 Register RHS = CCMI->getOperand(3).getReg();
Amara Emerson7a4d2df2019-07-10 19:21:43 +0000945 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
946 if (!VRegAndVal)
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000947 std::swap(RHS, LHS);
948
Amara Emerson7a4d2df2019-07-10 19:21:43 +0000949 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
950 if (!VRegAndVal || VRegAndVal->Value != 0) {
951 MachineIRBuilder MIB(I);
952 // If we can't select a CBZ then emit a cmp + Bcc.
953 if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
954 CCMI->getOperand(1), MIB))
955 return false;
956 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
957 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
958 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
959 I.eraseFromParent();
960 return true;
961 }
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000962
963 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
964 if (RB.getID() != AArch64::GPRRegBankID)
965 return false;
966
967 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
968 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
969 return false;
970
971 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
972 unsigned CBOpc = 0;
973 if (CmpWidth <= 32)
974 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
975 else if (CmpWidth == 64)
976 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
977 else
978 return false;
979
Aditya Nandakumar18b3f9d2018-01-17 19:31:33 +0000980 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
981 .addUse(LHS)
982 .addMBB(DestMBB)
983 .constrainAllUses(TII, TRI, RBI);
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000984
Ahmed Bougacha641cb202017-03-27 16:35:31 +0000985 I.eraseFromParent();
986 return true;
987}
988
Amara Emerson9bf092d2019-04-09 21:22:43 +0000989bool AArch64InstructionSelector::selectVectorSHL(
990 MachineInstr &I, MachineRegisterInfo &MRI) const {
991 assert(I.getOpcode() == TargetOpcode::G_SHL);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000992 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000993 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +0000994 Register Src1Reg = I.getOperand(1).getReg();
995 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +0000996
997 if (!Ty.isVector())
998 return false;
999
1000 unsigned Opc = 0;
Amara Emerson9bf092d2019-04-09 21:22:43 +00001001 if (Ty == LLT::vector(4, 32)) {
1002 Opc = AArch64::USHLv4i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +00001003 } else if (Ty == LLT::vector(2, 32)) {
1004 Opc = AArch64::USHLv2i32;
Amara Emerson9bf092d2019-04-09 21:22:43 +00001005 } else {
1006 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1007 return false;
1008 }
1009
1010 MachineIRBuilder MIB(I);
1011 auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
1012 constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
1013 I.eraseFromParent();
1014 return true;
1015}
1016
1017bool AArch64InstructionSelector::selectVectorASHR(
1018 MachineInstr &I, MachineRegisterInfo &MRI) const {
1019 assert(I.getOpcode() == TargetOpcode::G_ASHR);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001020 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00001021 const LLT Ty = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001022 Register Src1Reg = I.getOperand(1).getReg();
1023 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00001024
1025 if (!Ty.isVector())
1026 return false;
1027
1028 // There is not a shift right register instruction, but the shift left
1029 // register instruction takes a signed value, where negative numbers specify a
1030 // right shift.
1031
1032 unsigned Opc = 0;
1033 unsigned NegOpc = 0;
1034 const TargetRegisterClass *RC = nullptr;
1035 if (Ty == LLT::vector(4, 32)) {
1036 Opc = AArch64::SSHLv4i32;
1037 NegOpc = AArch64::NEGv4i32;
1038 RC = &AArch64::FPR128RegClass;
1039 } else if (Ty == LLT::vector(2, 32)) {
1040 Opc = AArch64::SSHLv2i32;
1041 NegOpc = AArch64::NEGv2i32;
1042 RC = &AArch64::FPR64RegClass;
1043 } else {
1044 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1045 return false;
1046 }
1047
1048 MachineIRBuilder MIB(I);
1049 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1050 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1051 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1052 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1053 I.eraseFromParent();
1054 return true;
1055}
1056
Tim Northovere9600d82017-02-08 17:57:27 +00001057bool AArch64InstructionSelector::selectVaStartAAPCS(
1058 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1059 return false;
1060}
1061
1062bool AArch64InstructionSelector::selectVaStartDarwin(
1063 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1064 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001065 Register ListReg = I.getOperand(0).getReg();
Tim Northovere9600d82017-02-08 17:57:27 +00001066
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001067 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Tim Northovere9600d82017-02-08 17:57:27 +00001068
1069 auto MIB =
1070 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1071 .addDef(ArgsAddrReg)
1072 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1073 .addImm(0)
1074 .addImm(0);
1075
1076 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1077
1078 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1079 .addUse(ArgsAddrReg)
1080 .addUse(ListReg)
1081 .addImm(0)
1082 .addMemOperand(*I.memoperands_begin());
1083
1084 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1085 I.eraseFromParent();
1086 return true;
1087}
1088
Amara Emerson1e8c1642018-07-31 00:09:02 +00001089void AArch64InstructionSelector::materializeLargeCMVal(
Peter Collingbourne33773d52019-07-31 20:14:09 +00001090 MachineInstr &I, const Value *V, unsigned OpFlags) const {
Amara Emerson1e8c1642018-07-31 00:09:02 +00001091 MachineBasicBlock &MBB = *I.getParent();
1092 MachineFunction &MF = *MBB.getParent();
1093 MachineRegisterInfo &MRI = MF.getRegInfo();
1094 MachineIRBuilder MIB(I);
1095
Aditya Nandakumarcef44a22018-12-11 00:48:50 +00001096 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
Amara Emerson1e8c1642018-07-31 00:09:02 +00001097 MovZ->addOperand(MF, I.getOperand(1));
1098 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1099 AArch64II::MO_NC);
1100 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1101 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1102
Matt Arsenaulte3a676e2019-06-24 15:50:29 +00001103 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1104 Register ForceDstReg) {
1105 Register DstReg = ForceDstReg
Amara Emerson1e8c1642018-07-31 00:09:02 +00001106 ? ForceDstReg
1107 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1108 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1109 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1110 MovI->addOperand(MF, MachineOperand::CreateGA(
1111 GV, MovZ->getOperand(1).getOffset(), Flags));
1112 } else {
1113 MovI->addOperand(
1114 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1115 MovZ->getOperand(1).getOffset(), Flags));
1116 }
1117 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1118 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1119 return DstReg;
1120 };
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001121 Register DstReg = BuildMovK(MovZ.getReg(0),
Amara Emerson1e8c1642018-07-31 00:09:02 +00001122 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1123 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1124 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1125 return;
1126}
1127
Amara Emersoncac11512019-07-03 01:49:06 +00001128void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
1129 MachineBasicBlock &MBB = *I.getParent();
1130 MachineFunction &MF = *MBB.getParent();
1131 MachineRegisterInfo &MRI = MF.getRegInfo();
1132
1133 switch (I.getOpcode()) {
1134 case TargetOpcode::G_SHL:
1135 case TargetOpcode::G_ASHR:
1136 case TargetOpcode::G_LSHR: {
1137 // These shifts are legalized to have 64 bit shift amounts because we want
1138 // to take advantage of the existing imported selection patterns that assume
1139 // the immediates are s64s. However, if the shifted type is 32 bits and for
1140 // some reason we receive input GMIR that has an s64 shift amount that's not
1141 // a G_CONSTANT, insert a truncate so that we can still select the s32
1142 // register-register variant.
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001143 Register SrcReg = I.getOperand(1).getReg();
1144 Register ShiftReg = I.getOperand(2).getReg();
Amara Emersoncac11512019-07-03 01:49:06 +00001145 const LLT ShiftTy = MRI.getType(ShiftReg);
1146 const LLT SrcTy = MRI.getType(SrcReg);
1147 if (SrcTy.isVector())
1148 return;
1149 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1150 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1151 return;
1152 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1153 assert(AmtMI && "could not find a vreg definition for shift amount");
1154 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1155 // Insert a subregister copy to implement a 64->32 trunc
1156 MachineIRBuilder MIB(I);
1157 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1158 .addReg(ShiftReg, 0, AArch64::sub_32);
1159 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1160 I.getOperand(2).setReg(Trunc.getReg(0));
1161 }
1162 return;
1163 }
Jessica Paquette41affad2019-07-20 01:55:35 +00001164 case TargetOpcode::G_STORE:
1165 contractCrossBankCopyIntoStore(I, MRI);
1166 return;
Amara Emersoncac11512019-07-03 01:49:06 +00001167 default:
1168 return;
1169 }
1170}
1171
1172bool AArch64InstructionSelector::earlySelectSHL(
1173 MachineInstr &I, MachineRegisterInfo &MRI) const {
1174 // We try to match the immediate variant of LSL, which is actually an alias
1175 // for a special case of UBFM. Otherwise, we fall back to the imported
1176 // selector which will match the register variant.
1177 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1178 const auto &MO = I.getOperand(2);
1179 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1180 if (!VRegAndVal)
1181 return false;
1182
1183 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1184 if (DstTy.isVector())
1185 return false;
1186 bool Is64Bit = DstTy.getSizeInBits() == 64;
1187 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1188 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1189 MachineIRBuilder MIB(I);
1190
1191 if (!Imm1Fn || !Imm2Fn)
1192 return false;
1193
1194 auto NewI =
1195 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1196 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1197
1198 for (auto &RenderFn : *Imm1Fn)
1199 RenderFn(NewI);
1200 for (auto &RenderFn : *Imm2Fn)
1201 RenderFn(NewI);
1202
1203 I.eraseFromParent();
1204 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1205}
1206
Jessica Paquette41affad2019-07-20 01:55:35 +00001207void AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1208 MachineInstr &I, MachineRegisterInfo &MRI) const {
1209 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1210 // If we're storing a scalar, it doesn't matter what register bank that
1211 // scalar is on. All that matters is the size.
1212 //
1213 // So, if we see something like this (with a 32-bit scalar as an example):
1214 //
1215 // %x:gpr(s32) = ... something ...
1216 // %y:fpr(s32) = COPY %x:gpr(s32)
1217 // G_STORE %y:fpr(s32)
1218 //
1219 // We can fix this up into something like this:
1220 //
1221 // G_STORE %x:gpr(s32)
1222 //
1223 // And then continue the selection process normally.
1224 MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI);
1225 if (!Def)
1226 return;
1227 Register DefDstReg = Def->getOperand(0).getReg();
1228 LLT DefDstTy = MRI.getType(DefDstReg);
1229 Register StoreSrcReg = I.getOperand(0).getReg();
1230 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1231
1232 // If we get something strange like a physical register, then we shouldn't
1233 // go any further.
1234 if (!DefDstTy.isValid())
1235 return;
1236
1237 // Are the source and dst types the same size?
1238 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1239 return;
1240
1241 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1242 RBI.getRegBank(DefDstReg, MRI, TRI))
1243 return;
1244
1245 // We have a cross-bank copy, which is entering a store. Let's fold it.
1246 I.getOperand(0).setReg(DefDstReg);
1247}
1248
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001249bool AArch64InstructionSelector::earlySelectLoad(
1250 MachineInstr &I, MachineRegisterInfo &MRI) const {
1251 // Try to fold in shifts, etc into the addressing mode of a load.
1252 assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1253
1254 // Don't handle atomic loads/stores yet.
1255 auto &MemOp = **I.memoperands_begin();
Philip Reames5c38ca32019-08-15 22:21:14 +00001256 if (MemOp.isAtomic()) {
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001257 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1258 return false;
1259 }
1260
1261 unsigned MemBytes = MemOp.getSize();
1262
1263 // Only support 64-bit loads for now.
1264 if (MemBytes != 8)
1265 return false;
1266
1267 Register DstReg = I.getOperand(0).getReg();
1268 const LLT DstTy = MRI.getType(DstReg);
1269 // Don't handle vectors.
1270 if (DstTy.isVector())
1271 return false;
1272
1273 unsigned DstSize = DstTy.getSizeInBits();
1274 // TODO: 32-bit destinations.
1275 if (DstSize != 64)
1276 return false;
1277
Jessica Paquette2b404d02019-07-23 16:09:42 +00001278 // Check if we can do any folding from GEPs/shifts etc. into the load.
1279 auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001280 if (!ImmFn)
1281 return false;
1282
1283 // We can fold something. Emit the load here.
1284 MachineIRBuilder MIB(I);
1285
1286 // Choose the instruction based off the size of the element being loaded, and
1287 // whether or not we're loading into a FPR.
1288 const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1289 unsigned Opc =
1290 RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1291 // Construct the load.
1292 auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1293 for (auto &RenderFn : *ImmFn)
1294 RenderFn(LoadMI);
1295 LoadMI.addMemOperand(*I.memoperands_begin());
1296 I.eraseFromParent();
1297 return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1298}
1299
Amara Emersoncac11512019-07-03 01:49:06 +00001300bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1301 assert(I.getParent() && "Instruction should be in a basic block!");
1302 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1303
1304 MachineBasicBlock &MBB = *I.getParent();
1305 MachineFunction &MF = *MBB.getParent();
1306 MachineRegisterInfo &MRI = MF.getRegInfo();
1307
1308 switch (I.getOpcode()) {
1309 case TargetOpcode::G_SHL:
1310 return earlySelectSHL(I, MRI);
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00001311 case TargetOpcode::G_LOAD:
1312 return earlySelectLoad(I, MRI);
Tim Northoverde98e922019-08-06 09:18:41 +00001313 case TargetOpcode::G_CONSTANT: {
1314 bool IsZero = false;
1315 if (I.getOperand(1).isCImm())
1316 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1317 else if (I.getOperand(1).isImm())
1318 IsZero = I.getOperand(1).getImm() == 0;
1319
1320 if (!IsZero)
1321 return false;
1322
1323 Register DefReg = I.getOperand(0).getReg();
1324 LLT Ty = MRI.getType(DefReg);
Tim Northoverb5abc422019-08-06 13:34:08 +00001325 if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32))
1326 return false;
Tim Northoverde98e922019-08-06 09:18:41 +00001327
1328 if (Ty == LLT::scalar(64)) {
1329 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1330 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1331 } else {
1332 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1333 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1334 }
1335 I.setDesc(TII.get(TargetOpcode::COPY));
1336 return true;
1337 }
Amara Emersoncac11512019-07-03 01:49:06 +00001338 default:
1339 return false;
1340 }
1341}
1342
Amara Emersone14c91b2019-08-13 06:26:59 +00001343bool AArch64InstructionSelector::select(MachineInstr &I) {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001344 assert(I.getParent() && "Instruction should be in a basic block!");
1345 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1346
1347 MachineBasicBlock &MBB = *I.getParent();
1348 MachineFunction &MF = *MBB.getParent();
1349 MachineRegisterInfo &MRI = MF.getRegInfo();
1350
Tim Northovercdf23f12016-10-31 18:30:59 +00001351 unsigned Opcode = I.getOpcode();
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001352 // G_PHI requires same handling as PHI
1353 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) {
Tim Northovercdf23f12016-10-31 18:30:59 +00001354 // Certain non-generic instructions also need some special handling.
1355
1356 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1357 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001358
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001359 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001360 const Register DefReg = I.getOperand(0).getReg();
Tim Northover7d88da62016-11-08 00:34:06 +00001361 const LLT DefTy = MRI.getType(DefReg);
1362
Matt Arsenault732149b2019-07-01 17:02:24 +00001363 const RegClassOrRegBank &RegClassOrBank =
1364 MRI.getRegClassOrRegBank(DefReg);
Tim Northover7d88da62016-11-08 00:34:06 +00001365
Matt Arsenault732149b2019-07-01 17:02:24 +00001366 const TargetRegisterClass *DefRC
1367 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1368 if (!DefRC) {
1369 if (!DefTy.isValid()) {
1370 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1371 return false;
1372 }
1373 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1374 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001375 if (!DefRC) {
Matt Arsenault732149b2019-07-01 17:02:24 +00001376 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1377 return false;
Tim Northover7d88da62016-11-08 00:34:06 +00001378 }
1379 }
Matt Arsenault732149b2019-07-01 17:02:24 +00001380
Aditya Nandakumarefd8a842017-08-23 20:45:48 +00001381 I.setDesc(TII.get(TargetOpcode::PHI));
Tim Northover7d88da62016-11-08 00:34:06 +00001382
1383 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1384 }
1385
1386 if (I.isCopy())
Tim Northovercdf23f12016-10-31 18:30:59 +00001387 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover7d88da62016-11-08 00:34:06 +00001388
1389 return true;
Tim Northovercdf23f12016-10-31 18:30:59 +00001390 }
1391
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001392
1393 if (I.getNumOperands() != I.getNumExplicitOperands()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001394 LLVM_DEBUG(
1395 dbgs() << "Generic instruction has unexpected implicit operands\n");
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001396 return false;
1397 }
1398
Amara Emersoncac11512019-07-03 01:49:06 +00001399 // Try to do some lowering before we start instruction selecting. These
1400 // lowerings are purely transformations on the input G_MIR and so selection
1401 // must continue after any modification of the instruction.
1402 preISelLower(I);
1403
1404 // There may be patterns where the importer can't deal with them optimally,
1405 // but does select it to a suboptimal sequence so our custom C++ selection
1406 // code later never has a chance to work on it. Therefore, we have an early
1407 // selection attempt here to give priority to certain selection routines
1408 // over the imported ones.
1409 if (earlySelect(I))
1410 return true;
1411
Amara Emersone14c91b2019-08-13 06:26:59 +00001412 if (selectImpl(I, *CoverageInfo))
Ahmed Bougacha36f70352016-12-21 23:26:20 +00001413 return true;
1414
Tim Northover32a078a2016-09-15 10:09:59 +00001415 LLT Ty =
1416 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001417
Amara Emerson3739a202019-03-15 21:59:50 +00001418 MachineIRBuilder MIB(I);
1419
Tim Northover69271c62016-10-12 22:49:11 +00001420 switch (Opcode) {
Tim Northover5e3dbf32016-10-12 22:49:01 +00001421 case TargetOpcode::G_BRCOND: {
1422 if (Ty.getSizeInBits() > 32) {
1423 // We shouldn't need this on AArch64, but it would be implemented as an
1424 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1425 // bit being tested is < 32.
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001426 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1427 << ", expected at most 32-bits");
Tim Northover5e3dbf32016-10-12 22:49:01 +00001428 return false;
1429 }
1430
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001431 const Register CondReg = I.getOperand(0).getReg();
Tim Northover5e3dbf32016-10-12 22:49:01 +00001432 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1433
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001434 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1435 // instructions will not be produced, as they are conditional branch
1436 // instructions that do not set flags.
1437 bool ProduceNonFlagSettingCondBr =
1438 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
1439 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
Ahmed Bougacha641cb202017-03-27 16:35:31 +00001440 return true;
1441
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001442 if (ProduceNonFlagSettingCondBr) {
1443 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1444 .addUse(CondReg)
1445 .addImm(/*bit offset=*/0)
1446 .addMBB(DestMBB);
Tim Northover5e3dbf32016-10-12 22:49:01 +00001447
Kristof Beylse66bc1f2018-12-18 08:50:02 +00001448 I.eraseFromParent();
1449 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1450 } else {
1451 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1452 .addDef(AArch64::WZR)
1453 .addUse(CondReg)
1454 .addImm(1);
1455 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1456 auto Bcc =
1457 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1458 .addImm(AArch64CC::EQ)
1459 .addMBB(DestMBB);
1460
1461 I.eraseFromParent();
1462 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1463 }
Tim Northover5e3dbf32016-10-12 22:49:01 +00001464 }
1465
Kristof Beyls65a12c02017-01-30 09:13:18 +00001466 case TargetOpcode::G_BRINDIRECT: {
1467 I.setDesc(TII.get(AArch64::BR));
1468 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1469 }
1470
Amara Emerson6e71b342019-06-21 18:10:41 +00001471 case TargetOpcode::G_BRJT:
1472 return selectBrJT(I, MRI);
1473
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001474 case TargetOpcode::G_BSWAP: {
1475 // Handle vector types for G_BSWAP directly.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001476 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette67ab9eb2019-04-26 18:00:01 +00001477 LLT DstTy = MRI.getType(DstReg);
1478
1479 // We should only get vector types here; everything else is handled by the
1480 // importer right now.
1481 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1482 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1483 return false;
1484 }
1485
1486 // Only handle 4 and 2 element vectors for now.
1487 // TODO: 16-bit elements.
1488 unsigned NumElts = DstTy.getNumElements();
1489 if (NumElts != 4 && NumElts != 2) {
1490 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1491 return false;
1492 }
1493
1494 // Choose the correct opcode for the supported types. Right now, that's
1495 // v2s32, v4s32, and v2s64.
1496 unsigned Opc = 0;
1497 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1498 if (EltSize == 32)
1499 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1500 : AArch64::REV32v16i8;
1501 else if (EltSize == 64)
1502 Opc = AArch64::REV64v16i8;
1503
1504 // We should always get something by the time we get here...
1505 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1506
1507 I.setDesc(TII.get(Opc));
1508 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1509 }
1510
Tim Northover4494d692016-10-18 19:47:57 +00001511 case TargetOpcode::G_FCONSTANT:
Tim Northover4edc60d2016-10-10 21:49:42 +00001512 case TargetOpcode::G_CONSTANT: {
Tim Northover4494d692016-10-18 19:47:57 +00001513 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1514
Amara Emerson8f25a022019-06-21 16:43:50 +00001515 const LLT s8 = LLT::scalar(8);
1516 const LLT s16 = LLT::scalar(16);
Tim Northover4494d692016-10-18 19:47:57 +00001517 const LLT s32 = LLT::scalar(32);
1518 const LLT s64 = LLT::scalar(64);
1519 const LLT p0 = LLT::pointer(0, 64);
1520
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001521 const Register DefReg = I.getOperand(0).getReg();
Tim Northover4494d692016-10-18 19:47:57 +00001522 const LLT DefTy = MRI.getType(DefReg);
1523 const unsigned DefSize = DefTy.getSizeInBits();
1524 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1525
1526 // FIXME: Redundant check, but even less readable when factored out.
1527 if (isFP) {
1528 if (Ty != s32 && Ty != s64) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001529 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1530 << " constant, expected: " << s32 << " or " << s64
1531 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001532 return false;
1533 }
1534
1535 if (RB.getID() != AArch64::FPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001536 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
1537 << " constant on bank: " << RB
1538 << ", expected: FPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001539 return false;
1540 }
Daniel Sanders11300ce2017-10-13 21:28:03 +00001541
1542 // The case when we have 0.0 is covered by tablegen. Reject it here so we
1543 // can be sure tablegen works correctly and isn't rescued by this code.
1544 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
1545 return false;
Tim Northover4494d692016-10-18 19:47:57 +00001546 } else {
Daniel Sanders05540042017-08-08 10:44:31 +00001547 // s32 and s64 are covered by tablegen.
Amara Emerson8f25a022019-06-21 16:43:50 +00001548 if (Ty != p0 && Ty != s8 && Ty != s16) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001549 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1550 << " constant, expected: " << s32 << ", " << s64
1551 << ", or " << p0 << '\n');
Tim Northover4494d692016-10-18 19:47:57 +00001552 return false;
1553 }
1554
1555 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001556 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
1557 << " constant on bank: " << RB
1558 << ", expected: GPR\n");
Tim Northover4494d692016-10-18 19:47:57 +00001559 return false;
1560 }
1561 }
1562
Amara Emerson8f25a022019-06-21 16:43:50 +00001563 // We allow G_CONSTANT of types < 32b.
Tim Northover4494d692016-10-18 19:47:57 +00001564 const unsigned MovOpc =
Amara Emerson8f25a022019-06-21 16:43:50 +00001565 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
Tim Northover4494d692016-10-18 19:47:57 +00001566
Tim Northover4494d692016-10-18 19:47:57 +00001567 if (isFP) {
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001568 // Either emit a FMOV, or emit a copy to emit a normal mov.
Tim Northover4494d692016-10-18 19:47:57 +00001569 const TargetRegisterClass &GPRRC =
1570 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
1571 const TargetRegisterClass &FPRRC =
1572 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
1573
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001574 // Can we use a FMOV instruction to represent the immediate?
1575 if (emitFMovForFConstant(I, MRI))
1576 return true;
1577
1578 // Nope. Emit a copy and use a normal mov instead.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001579 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
Tim Northover4494d692016-10-18 19:47:57 +00001580 MachineOperand &RegOp = I.getOperand(0);
1581 RegOp.setReg(DefGPRReg);
Amara Emerson3739a202019-03-15 21:59:50 +00001582 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1583 MIB.buildCopy({DefReg}, {DefGPRReg});
Tim Northover4494d692016-10-18 19:47:57 +00001584
1585 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001586 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
Tim Northover4494d692016-10-18 19:47:57 +00001587 return false;
1588 }
1589
1590 MachineOperand &ImmOp = I.getOperand(1);
1591 // FIXME: Is going through int64_t always correct?
1592 ImmOp.ChangeToImmediate(
1593 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001594 } else if (I.getOperand(1).isCImm()) {
Tim Northover9267ac52016-12-05 21:47:07 +00001595 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
1596 I.getOperand(1).ChangeToImmediate(Val);
Daniel Sanders066ebbf2017-02-24 15:43:30 +00001597 } else if (I.getOperand(1).isImm()) {
1598 uint64_t Val = I.getOperand(1).getImm();
1599 I.getOperand(1).ChangeToImmediate(Val);
Tim Northover4494d692016-10-18 19:47:57 +00001600 }
1601
Jessica Paquettea3843fe2019-05-01 22:39:43 +00001602 I.setDesc(TII.get(MovOpc));
Tim Northover4494d692016-10-18 19:47:57 +00001603 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1604 return true;
Tim Northover4edc60d2016-10-10 21:49:42 +00001605 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001606 case TargetOpcode::G_EXTRACT: {
Amara Emerson511f7f52019-07-23 22:05:13 +00001607 Register DstReg = I.getOperand(0).getReg();
1608 Register SrcReg = I.getOperand(1).getReg();
1609 LLT SrcTy = MRI.getType(SrcReg);
1610 LLT DstTy = MRI.getType(DstReg);
Amara Emerson242efdb2018-02-18 17:28:34 +00001611 (void)DstTy;
Amara Emersonbc03bae2018-02-18 17:03:02 +00001612 unsigned SrcSize = SrcTy.getSizeInBits();
Amara Emerson511f7f52019-07-23 22:05:13 +00001613
1614 if (SrcTy.getSizeInBits() > 64) {
1615 // This should be an extract of an s128, which is like a vector extract.
1616 if (SrcTy.getSizeInBits() != 128)
1617 return false;
1618 // Only support extracting 64 bits from an s128 at the moment.
1619 if (DstTy.getSizeInBits() != 64)
1620 return false;
1621
1622 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1623 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1624 // Check we have the right regbank always.
1625 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
1626 DstRB.getID() == AArch64::FPRRegBankID &&
1627 "Wrong extract regbank!");
Fangrui Song305ace72019-07-24 01:59:44 +00001628 (void)SrcRB;
Amara Emerson511f7f52019-07-23 22:05:13 +00001629
1630 // Emit the same code as a vector extract.
1631 // Offset must be a multiple of 64.
1632 unsigned Offset = I.getOperand(2).getImm();
1633 if (Offset % 64 != 0)
1634 return false;
1635 unsigned LaneIdx = Offset / 64;
1636 MachineIRBuilder MIB(I);
1637 MachineInstr *Extract = emitExtractVectorElt(
1638 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
1639 if (!Extract)
1640 return false;
1641 I.eraseFromParent();
1642 return true;
1643 }
Tim Northover7b6d66c2017-07-20 22:58:38 +00001644
Amara Emersonbc03bae2018-02-18 17:03:02 +00001645 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001646 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
1647 Ty.getSizeInBits() - 1);
1648
Amara Emersonbc03bae2018-02-18 17:03:02 +00001649 if (SrcSize < 64) {
1650 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
1651 "unexpected G_EXTRACT types");
1652 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1653 }
1654
Amara Emerson511f7f52019-07-23 22:05:13 +00001655 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Amara Emerson3739a202019-03-15 21:59:50 +00001656 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
Amara Emerson86271782019-03-18 19:20:10 +00001657 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
1658 .addReg(DstReg, 0, AArch64::sub_32);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001659 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
1660 AArch64::GPR32RegClass, MRI);
1661 I.getOperand(0).setReg(DstReg);
1662
1663 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1664 }
1665
1666 case TargetOpcode::G_INSERT: {
1667 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
Amara Emersonbc03bae2018-02-18 17:03:02 +00001668 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1669 unsigned DstSize = DstTy.getSizeInBits();
Tim Northover7b6d66c2017-07-20 22:58:38 +00001670 // Larger inserts are vectors, same-size ones should be something else by
1671 // now (split up or turned into COPYs).
1672 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
1673 return false;
1674
Amara Emersonbc03bae2018-02-18 17:03:02 +00001675 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001676 unsigned LSB = I.getOperand(3).getImm();
1677 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
Amara Emersonbc03bae2018-02-18 17:03:02 +00001678 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
Tim Northover7b6d66c2017-07-20 22:58:38 +00001679 MachineInstrBuilder(MF, I).addImm(Width - 1);
1680
Amara Emersonbc03bae2018-02-18 17:03:02 +00001681 if (DstSize < 64) {
1682 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
1683 "unexpected G_INSERT types");
1684 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1685 }
1686
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001687 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
Tim Northover7b6d66c2017-07-20 22:58:38 +00001688 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
1689 TII.get(AArch64::SUBREG_TO_REG))
1690 .addDef(SrcReg)
1691 .addImm(0)
1692 .addUse(I.getOperand(2).getReg())
1693 .addImm(AArch64::sub_32);
1694 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
1695 AArch64::GPR32RegClass, MRI);
1696 I.getOperand(2).setReg(SrcReg);
1697
1698 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1699 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001700 case TargetOpcode::G_FRAME_INDEX: {
1701 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
Tim Northover5ae83502016-09-15 09:20:34 +00001702 if (Ty != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001703 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
1704 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001705 return false;
1706 }
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001707 I.setDesc(TII.get(AArch64::ADDXri));
Ahmed Bougacha0306b5e2016-08-16 14:02:42 +00001708
1709 // MOs for a #0 shifted immediate.
1710 I.addOperand(MachineOperand::CreateImm(0));
1711 I.addOperand(MachineOperand::CreateImm(0));
1712
1713 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1714 }
Tim Northoverbdf16242016-10-10 21:50:00 +00001715
1716 case TargetOpcode::G_GLOBAL_VALUE: {
1717 auto GV = I.getOperand(1).getGlobal();
Tim Northover01eb8692019-08-09 09:32:38 +00001718 if (GV->isThreadLocal())
1719 return selectTLSGlobalValue(I, MRI);
1720
Peter Collingbourne33773d52019-07-31 20:14:09 +00001721 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001722 if (OpFlags & AArch64II::MO_GOT) {
Tim Northoverbdf16242016-10-10 21:50:00 +00001723 I.setDesc(TII.get(AArch64::LOADgot));
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001724 I.getOperand(1).setTargetFlags(OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001725 } else if (TM.getCodeModel() == CodeModel::Large) {
1726 // Materialize the global using movz/movk instructions.
Amara Emerson1e8c1642018-07-31 00:09:02 +00001727 materializeLargeCMVal(I, GV, OpFlags);
Amara Emersond5785772018-01-18 19:21:27 +00001728 I.eraseFromParent();
1729 return true;
David Green9dd1d452018-08-22 11:31:39 +00001730 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1731 I.setDesc(TII.get(AArch64::ADR));
1732 I.getOperand(1).setTargetFlags(OpFlags);
Tim Northoverfe7c59a2016-12-13 18:25:38 +00001733 } else {
Tim Northoverbdf16242016-10-10 21:50:00 +00001734 I.setDesc(TII.get(AArch64::MOVaddr));
1735 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
1736 MachineInstrBuilder MIB(MF, I);
1737 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
1738 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1739 }
1740 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1741 }
1742
Amara Emersond3144a42019-06-06 07:58:37 +00001743 case TargetOpcode::G_ZEXTLOAD:
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001744 case TargetOpcode::G_LOAD:
1745 case TargetOpcode::G_STORE: {
Amara Emersond3144a42019-06-06 07:58:37 +00001746 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
1747 MachineIRBuilder MIB(I);
1748
Tim Northover0f140c72016-09-09 11:46:34 +00001749 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001750
Tim Northover5ae83502016-09-15 09:20:34 +00001751 if (PtrTy != LLT::pointer(0, 64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001752 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
1753 << ", expected: " << LLT::pointer(0, 64) << '\n');
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001754 return false;
1755 }
1756
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001757 auto &MemOp = **I.memoperands_begin();
Philip Reames5c38ca32019-08-15 22:21:14 +00001758 if (MemOp.isAtomic()) {
Amara Emerson1222cfd2019-08-14 21:30:30 +00001759 // For now we just support s8 acquire loads to be able to compile stack
1760 // protector code.
1761 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
1762 MemOp.getSize() == 1) {
1763 I.setDesc(TII.get(AArch64::LDARB));
1764 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1765 }
1766 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001767 return false;
1768 }
Daniel Sandersf84bc372018-05-05 20:53:24 +00001769 unsigned MemSizeInBits = MemOp.getSize() * 8;
Daniel Sanders3c1c4c02017-12-05 05:52:07 +00001770
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001771 const Register PtrReg = I.getOperand(1).getReg();
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001772#ifndef NDEBUG
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001773 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
Ahmed Bougachaf0b22c42017-03-27 18:14:20 +00001774 // Sanity-check the pointer register.
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001775 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
1776 "Load/Store pointer operand isn't a GPR");
Tim Northover0f140c72016-09-09 11:46:34 +00001777 assert(MRI.getType(PtrReg).isPointer() &&
1778 "Load/Store pointer operand isn't a pointer");
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001779#endif
1780
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001781 const Register ValReg = I.getOperand(0).getReg();
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001782 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
1783
1784 const unsigned NewOpc =
Daniel Sandersf84bc372018-05-05 20:53:24 +00001785 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001786 if (NewOpc == I.getOpcode())
1787 return false;
1788
1789 I.setDesc(TII.get(NewOpc));
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001790
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001791 uint64_t Offset = 0;
1792 auto *PtrMI = MRI.getVRegDef(PtrReg);
1793
1794 // Try to fold a GEP into our unsigned immediate addressing mode.
1795 if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
1796 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
1797 int64_t Imm = *COff;
Daniel Sandersf84bc372018-05-05 20:53:24 +00001798 const unsigned Size = MemSizeInBits / 8;
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001799 const unsigned Scale = Log2_32(Size);
1800 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001801 Register Ptr2Reg = PtrMI->getOperand(1).getReg();
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001802 I.getOperand(1).setReg(Ptr2Reg);
1803 PtrMI = MRI.getVRegDef(Ptr2Reg);
1804 Offset = Imm / Size;
1805 }
1806 }
1807 }
1808
Ahmed Bougachaf75782f2017-03-27 17:31:56 +00001809 // If we haven't folded anything into our addressing mode yet, try to fold
1810 // a frame index into the base+offset.
1811 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1812 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
1813
Ahmed Bougacha8a654082017-03-27 17:31:52 +00001814 I.addOperand(MachineOperand::CreateImm(Offset));
Ahmed Bougacha85a66a62017-03-27 17:31:48 +00001815
1816 // If we're storing a 0, use WZR/XZR.
1817 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
1818 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
1819 if (I.getOpcode() == AArch64::STRWui)
1820 I.getOperand(0).setReg(AArch64::WZR);
1821 else if (I.getOpcode() == AArch64::STRXui)
1822 I.getOperand(0).setReg(AArch64::XZR);
1823 }
1824 }
1825
Amara Emersond3144a42019-06-06 07:58:37 +00001826 if (IsZExtLoad) {
1827 // The zextload from a smaller type to i32 should be handled by the importer.
1828 if (MRI.getType(ValReg).getSizeInBits() != 64)
1829 return false;
1830 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
1831 //and zero_extend with SUBREG_TO_REG.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001832 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
1833 Register DstReg = I.getOperand(0).getReg();
Amara Emersond3144a42019-06-06 07:58:37 +00001834 I.getOperand(0).setReg(LdReg);
1835
1836 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
1837 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
1838 .addImm(0)
1839 .addUse(LdReg)
1840 .addImm(AArch64::sub_32);
1841 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1842 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
1843 MRI);
1844 }
Ahmed Bougacha7adfac52016-07-29 16:56:16 +00001845 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1846 }
1847
Tim Northover9dd78f82017-02-08 21:22:25 +00001848 case TargetOpcode::G_SMULH:
1849 case TargetOpcode::G_UMULH: {
1850 // Reject the various things we don't support yet.
1851 if (unsupportedBinOp(I, RBI, MRI, TRI))
1852 return false;
1853
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001854 const Register DefReg = I.getOperand(0).getReg();
Tim Northover9dd78f82017-02-08 21:22:25 +00001855 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1856
1857 if (RB.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001858 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
Tim Northover9dd78f82017-02-08 21:22:25 +00001859 return false;
1860 }
1861
1862 if (Ty != LLT::scalar(64)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001863 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
1864 << ", expected: " << LLT::scalar(64) << '\n');
Tim Northover9dd78f82017-02-08 21:22:25 +00001865 return false;
1866 }
1867
1868 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
1869 : AArch64::UMULHrr;
1870 I.setDesc(TII.get(NewOpc));
1871
1872 // Now that we selected an opcode, we need to constrain the register
1873 // operands to use appropriate classes.
1874 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1875 }
Ahmed Bougacha33e19fe2016-08-18 16:05:11 +00001876 case TargetOpcode::G_FADD:
1877 case TargetOpcode::G_FSUB:
1878 case TargetOpcode::G_FMUL:
1879 case TargetOpcode::G_FDIV:
1880
Ahmed Bougacha2ac5bf92016-08-16 14:02:47 +00001881 case TargetOpcode::G_ASHR:
Amara Emerson9bf092d2019-04-09 21:22:43 +00001882 if (MRI.getType(I.getOperand(0).getReg()).isVector())
1883 return selectVectorASHR(I, MRI);
1884 LLVM_FALLTHROUGH;
1885 case TargetOpcode::G_SHL:
1886 if (Opcode == TargetOpcode::G_SHL &&
1887 MRI.getType(I.getOperand(0).getReg()).isVector())
1888 return selectVectorSHL(I, MRI);
1889 LLVM_FALLTHROUGH;
1890 case TargetOpcode::G_OR:
Jessica Paquette728b18f2019-07-24 23:11:01 +00001891 case TargetOpcode::G_LSHR: {
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001892 // Reject the various things we don't support yet.
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001893 if (unsupportedBinOp(I, RBI, MRI, TRI))
1894 return false;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001895
Ahmed Bougacha59e160a2016-08-16 14:37:40 +00001896 const unsigned OpSize = Ty.getSizeInBits();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001897
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001898 const Register DefReg = I.getOperand(0).getReg();
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001899 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
1900
1901 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
1902 if (NewOpc == I.getOpcode())
1903 return false;
1904
1905 I.setDesc(TII.get(NewOpc));
1906 // FIXME: Should the type be always reset in setDesc?
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00001907
1908 // Now that we selected an opcode, we need to constrain the register
1909 // operands to use appropriate classes.
1910 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1911 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00001912
Jessica Paquette728b18f2019-07-24 23:11:01 +00001913 case TargetOpcode::G_GEP: {
1914 MachineIRBuilder MIRBuilder(I);
1915 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
1916 MIRBuilder);
1917 I.eraseFromParent();
1918 return true;
1919 }
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001920 case TargetOpcode::G_UADDO: {
1921 // TODO: Support other types.
1922 unsigned OpSize = Ty.getSizeInBits();
1923 if (OpSize != 32 && OpSize != 64) {
1924 LLVM_DEBUG(
1925 dbgs()
1926 << "G_UADDO currently only supported for 32 and 64 b types.\n");
1927 return false;
1928 }
1929
1930 // TODO: Support vectors.
1931 if (Ty.isVector()) {
1932 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
1933 return false;
1934 }
1935
1936 // Add and set the set condition flag.
1937 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
1938 MachineIRBuilder MIRBuilder(I);
1939 auto AddsMI = MIRBuilder.buildInstr(
1940 AddsOpc, {I.getOperand(0).getReg()},
1941 {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
1942 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
1943
1944 // Now, put the overflow result in the register given by the first operand
1945 // to the G_UADDO. CSINC increments the result when the predicate is false,
1946 // so to get the increment when it's true, we need to use the inverse. In
1947 // this case, we want to increment when carry is set.
1948 auto CsetMI = MIRBuilder
1949 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001950 {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette7d6784f2019-03-14 22:54:29 +00001951 .addImm(getInvertedCondCode(AArch64CC::HS));
1952 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
1953 I.eraseFromParent();
1954 return true;
1955 }
1956
Tim Northover398c5f52017-02-14 20:56:29 +00001957 case TargetOpcode::G_PTR_MASK: {
1958 uint64_t Align = I.getOperand(2).getImm();
1959 if (Align >= 64 || Align == 0)
1960 return false;
1961
1962 uint64_t Mask = ~((1ULL << Align) - 1);
1963 I.setDesc(TII.get(AArch64::ANDXri));
1964 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
1965
1966 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1967 }
Tim Northover037af52c2016-10-31 18:31:09 +00001968 case TargetOpcode::G_PTRTOINT:
Tim Northoverfb8d9892016-10-12 22:49:15 +00001969 case TargetOpcode::G_TRUNC: {
1970 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1971 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
1972
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00001973 const Register DstReg = I.getOperand(0).getReg();
1974 const Register SrcReg = I.getOperand(1).getReg();
Tim Northoverfb8d9892016-10-12 22:49:15 +00001975
1976 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1977 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1978
1979 if (DstRB.getID() != SrcRB.getID()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001980 LLVM_DEBUG(
1981 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001982 return false;
1983 }
1984
1985 if (DstRB.getID() == AArch64::GPRRegBankID) {
1986 const TargetRegisterClass *DstRC =
1987 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
1988 if (!DstRC)
1989 return false;
1990
1991 const TargetRegisterClass *SrcRC =
1992 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
1993 if (!SrcRC)
1994 return false;
1995
1996 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1997 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001998 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00001999 return false;
2000 }
2001
2002 if (DstRC == SrcRC) {
2003 // Nothing to be done
Daniel Sanderscc36dbf2017-06-27 10:11:39 +00002004 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2005 SrcTy == LLT::scalar(64)) {
2006 llvm_unreachable("TableGen can import this case");
2007 return false;
Tim Northoverfb8d9892016-10-12 22:49:15 +00002008 } else if (DstRC == &AArch64::GPR32RegClass &&
2009 SrcRC == &AArch64::GPR64RegClass) {
2010 I.getOperand(1).setSubReg(AArch64::sub_32);
2011 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002012 LLVM_DEBUG(
2013 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
Tim Northoverfb8d9892016-10-12 22:49:15 +00002014 return false;
2015 }
2016
2017 I.setDesc(TII.get(TargetOpcode::COPY));
2018 return true;
2019 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2020 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2021 I.setDesc(TII.get(AArch64::XTNv4i16));
2022 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2023 return true;
2024 }
Amara Emerson511f7f52019-07-23 22:05:13 +00002025
2026 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2027 MachineIRBuilder MIB(I);
2028 MachineInstr *Extract = emitExtractVectorElt(
2029 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2030 if (!Extract)
2031 return false;
2032 I.eraseFromParent();
2033 return true;
2034 }
Tim Northoverfb8d9892016-10-12 22:49:15 +00002035 }
2036
2037 return false;
2038 }
2039
Tim Northover3d38b3a2016-10-11 20:50:21 +00002040 case TargetOpcode::G_ANYEXT: {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002041 const Register DstReg = I.getOperand(0).getReg();
2042 const Register SrcReg = I.getOperand(1).getReg();
Tim Northover3d38b3a2016-10-11 20:50:21 +00002043
Quentin Colombetcb629a82016-10-12 03:57:49 +00002044 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2045 if (RBDst.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002046 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2047 << ", expected: GPR\n");
Quentin Colombetcb629a82016-10-12 03:57:49 +00002048 return false;
2049 }
Tim Northover3d38b3a2016-10-11 20:50:21 +00002050
Quentin Colombetcb629a82016-10-12 03:57:49 +00002051 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2052 if (RBSrc.getID() != AArch64::GPRRegBankID) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002053 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2054 << ", expected: GPR\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002055 return false;
2056 }
2057
2058 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2059
2060 if (DstSize == 0) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002061 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002062 return false;
2063 }
2064
Quentin Colombetcb629a82016-10-12 03:57:49 +00002065 if (DstSize != 64 && DstSize > 32) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002066 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2067 << ", expected: 32 or 64\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002068 return false;
2069 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00002070 // At this point G_ANYEXT is just like a plain COPY, but we need
2071 // to explicitly form the 64-bit value if any.
2072 if (DstSize > 32) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002073 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
Quentin Colombetcb629a82016-10-12 03:57:49 +00002074 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2075 .addDef(ExtSrc)
2076 .addImm(0)
2077 .addUse(SrcReg)
2078 .addImm(AArch64::sub_32);
2079 I.getOperand(1).setReg(ExtSrc);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002080 }
Quentin Colombetcb629a82016-10-12 03:57:49 +00002081 return selectCopy(I, TII, MRI, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002082 }
2083
2084 case TargetOpcode::G_ZEXT:
2085 case TargetOpcode::G_SEXT: {
2086 unsigned Opcode = I.getOpcode();
Amara Emersonc07fe302019-07-26 00:01:09 +00002087 const bool IsSigned = Opcode == TargetOpcode::G_SEXT;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002088 const Register DefReg = I.getOperand(0).getReg();
2089 const Register SrcReg = I.getOperand(1).getReg();
Amara Emersonc07fe302019-07-26 00:01:09 +00002090 const LLT DstTy = MRI.getType(DefReg);
2091 const LLT SrcTy = MRI.getType(SrcReg);
2092 unsigned DstSize = DstTy.getSizeInBits();
2093 unsigned SrcSize = SrcTy.getSizeInBits();
Tim Northover3d38b3a2016-10-11 20:50:21 +00002094
Amara Emersonc07fe302019-07-26 00:01:09 +00002095 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2096 AArch64::GPRRegBankID &&
2097 "Unexpected ext regbank");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002098
Amara Emersonc07fe302019-07-26 00:01:09 +00002099 MachineIRBuilder MIB(I);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002100 MachineInstr *ExtI;
Amara Emersonc07fe302019-07-26 00:01:09 +00002101 if (DstTy.isVector())
2102 return false; // Should be handled by imported patterns.
2103
Amara Emerson73752ab2019-08-02 21:15:36 +00002104 // First check if we're extending the result of a load which has a dest type
2105 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2106 // GPR register on AArch64 and all loads which are smaller automatically
2107 // zero-extend the upper bits. E.g.
2108 // %v(s8) = G_LOAD %p, :: (load 1)
2109 // %v2(s32) = G_ZEXT %v(s8)
2110 if (!IsSigned) {
2111 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2112 if (LoadMI &&
2113 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) {
2114 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2115 unsigned BytesLoaded = MemOp->getSize();
2116 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2117 return selectCopy(I, TII, MRI, TRI, RBI);
2118 }
2119 }
2120
Amara Emersonc07fe302019-07-26 00:01:09 +00002121 if (DstSize == 64) {
Tim Northover3d38b3a2016-10-11 20:50:21 +00002122 // FIXME: Can we avoid manually doing this?
2123 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002124 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2125 << " operand\n");
Tim Northover3d38b3a2016-10-11 20:50:21 +00002126 return false;
2127 }
2128
Amara Emersonc07fe302019-07-26 00:01:09 +00002129 auto SubregToReg =
2130 MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {})
2131 .addImm(0)
2132 .addUse(SrcReg)
2133 .addImm(AArch64::sub_32);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002134
Amara Emersonc07fe302019-07-26 00:01:09 +00002135 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2136 {DefReg}, {SubregToReg})
2137 .addImm(0)
2138 .addImm(SrcSize - 1);
2139 } else if (DstSize <= 32) {
2140 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2141 {DefReg}, {SrcReg})
2142 .addImm(0)
2143 .addImm(SrcSize - 1);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002144 } else {
2145 return false;
2146 }
2147
2148 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
Tim Northover3d38b3a2016-10-11 20:50:21 +00002149 I.eraseFromParent();
2150 return true;
2151 }
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00002152
Tim Northover69271c62016-10-12 22:49:11 +00002153 case TargetOpcode::G_SITOFP:
2154 case TargetOpcode::G_UITOFP:
2155 case TargetOpcode::G_FPTOSI:
2156 case TargetOpcode::G_FPTOUI: {
2157 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2158 SrcTy = MRI.getType(I.getOperand(1).getReg());
2159 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2160 if (NewOpc == Opcode)
2161 return false;
2162
2163 I.setDesc(TII.get(NewOpc));
2164 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2165
2166 return true;
2167 }
2168
2169
Tim Northoverc1d8c2b2016-10-11 22:29:23 +00002170 case TargetOpcode::G_INTTOPTR:
Daniel Sandersedd07842017-08-17 09:26:14 +00002171 // The importer is currently unable to import pointer types since they
2172 // didn't exist in SelectionDAG.
Daniel Sanderseb2f5f32017-08-15 15:10:31 +00002173 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sanders16e6dd32017-08-15 13:50:09 +00002174
Daniel Sandersedd07842017-08-17 09:26:14 +00002175 case TargetOpcode::G_BITCAST:
2176 // Imported SelectionDAG rules can handle every bitcast except those that
2177 // bitcast from a type to the same type. Ideally, these shouldn't occur
Amara Emersonb9560512019-04-11 20:32:24 +00002178 // but we might not run an optimizer that deletes them. The other exception
2179 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2180 // of them.
2181 return selectCopy(I, TII, MRI, TRI, RBI);
Daniel Sandersedd07842017-08-17 09:26:14 +00002182
Tim Northover9ac0eba2016-11-08 00:45:29 +00002183 case TargetOpcode::G_SELECT: {
2184 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002185 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2186 << ", expected: " << LLT::scalar(1) << '\n');
Tim Northover9ac0eba2016-11-08 00:45:29 +00002187 return false;
2188 }
2189
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002190 const Register CondReg = I.getOperand(1).getReg();
2191 const Register TReg = I.getOperand(2).getReg();
2192 const Register FReg = I.getOperand(3).getReg();
Tim Northover9ac0eba2016-11-08 00:45:29 +00002193
Jessica Paquette99316042019-07-02 19:44:16 +00002194 if (tryOptSelect(I))
Amara Emersonc37ff0d2019-06-05 23:46:16 +00002195 return true;
Tim Northover9ac0eba2016-11-08 00:45:29 +00002196
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002197 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
Tim Northover9ac0eba2016-11-08 00:45:29 +00002198 MachineInstr &TstMI =
2199 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2200 .addDef(AArch64::WZR)
2201 .addUse(CondReg)
2202 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2203
2204 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2205 .addDef(I.getOperand(0).getReg())
2206 .addUse(TReg)
2207 .addUse(FReg)
2208 .addImm(AArch64CC::NE);
2209
2210 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2211 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2212
2213 I.eraseFromParent();
2214 return true;
2215 }
Tim Northover6c02ad52016-10-12 22:49:04 +00002216 case TargetOpcode::G_ICMP: {
Amara Emerson9bf092d2019-04-09 21:22:43 +00002217 if (Ty.isVector())
2218 return selectVectorICmp(I, MRI);
2219
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00002220 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002221 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2222 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover6c02ad52016-10-12 22:49:04 +00002223 return false;
2224 }
2225
Jessica Paquette49537bb2019-06-17 18:40:06 +00002226 MachineIRBuilder MIRBuilder(I);
Jessica Paquette99316042019-07-02 19:44:16 +00002227 if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
2228 MIRBuilder))
2229 return false;
Jessica Paquette49537bb2019-06-17 18:40:06 +00002230 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
Jessica Paquette99316042019-07-02 19:44:16 +00002231 MIRBuilder);
Tim Northover6c02ad52016-10-12 22:49:04 +00002232 I.eraseFromParent();
2233 return true;
2234 }
2235
Tim Northover7dd378d2016-10-12 22:49:07 +00002236 case TargetOpcode::G_FCMP: {
Aditya Nandakumar02c602e2017-07-31 17:00:16 +00002237 if (Ty != LLT::scalar(32)) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00002238 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2239 << ", expected: " << LLT::scalar(32) << '\n');
Tim Northover7dd378d2016-10-12 22:49:07 +00002240 return false;
2241 }
2242
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002243 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2244 if (!CmpOpc)
Tim Northover7dd378d2016-10-12 22:49:07 +00002245 return false;
Tim Northover7dd378d2016-10-12 22:49:07 +00002246
2247 // FIXME: regbank
2248
2249 AArch64CC::CondCode CC1, CC2;
2250 changeFCMPPredToAArch64CC(
2251 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2252
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002253 // Partially build the compare. Decide if we need to add a use for the
2254 // third operand based off whether or not we're comparing against 0.0.
2255 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2256 .addUse(I.getOperand(2).getReg());
2257
2258 // If we don't have an immediate compare, then we need to add a use of the
2259 // register which wasn't used for the immediate.
2260 // Note that the immediate will always be the last operand.
2261 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2262 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
Tim Northover7dd378d2016-10-12 22:49:07 +00002263
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002264 const Register DefReg = I.getOperand(0).getReg();
2265 Register Def1Reg = DefReg;
Tim Northover7dd378d2016-10-12 22:49:07 +00002266 if (CC2 != AArch64CC::AL)
2267 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2268
2269 MachineInstr &CSetMI =
2270 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2271 .addDef(Def1Reg)
2272 .addUse(AArch64::WZR)
2273 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00002274 .addImm(getInvertedCondCode(CC1));
Tim Northover7dd378d2016-10-12 22:49:07 +00002275
2276 if (CC2 != AArch64CC::AL) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002277 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
Tim Northover7dd378d2016-10-12 22:49:07 +00002278 MachineInstr &CSet2MI =
2279 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2280 .addDef(Def2Reg)
2281 .addUse(AArch64::WZR)
2282 .addUse(AArch64::WZR)
Tim Northover33a1a0b2017-01-17 23:04:01 +00002283 .addImm(getInvertedCondCode(CC2));
Tim Northover7dd378d2016-10-12 22:49:07 +00002284 MachineInstr &OrMI =
2285 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2286 .addDef(DefReg)
2287 .addUse(Def1Reg)
2288 .addUse(Def2Reg);
2289 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2290 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2291 }
Jessica Paquetteb73ea75b2019-05-28 22:52:49 +00002292 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
Tim Northover7dd378d2016-10-12 22:49:07 +00002293 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2294
2295 I.eraseFromParent();
2296 return true;
2297 }
Tim Northovere9600d82017-02-08 17:57:27 +00002298 case TargetOpcode::G_VASTART:
2299 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2300 : selectVaStartAAPCS(I, MF, MRI);
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00002301 case TargetOpcode::G_INTRINSIC:
2302 return selectIntrinsic(I, MRI);
Amara Emerson1f5d9942018-04-25 14:43:59 +00002303 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
Jessica Paquette22c62152019-04-02 19:57:26 +00002304 return selectIntrinsicWithSideEffects(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002305 case TargetOpcode::G_IMPLICIT_DEF: {
Justin Bogner4fc69662017-07-12 17:32:32 +00002306 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
Amara Emerson58aea522018-02-02 01:44:43 +00002307 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002308 const Register DstReg = I.getOperand(0).getReg();
Amara Emerson58aea522018-02-02 01:44:43 +00002309 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2310 const TargetRegisterClass *DstRC =
2311 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2312 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
Justin Bogner4fc69662017-07-12 17:32:32 +00002313 return true;
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002314 }
Amara Emerson1e8c1642018-07-31 00:09:02 +00002315 case TargetOpcode::G_BLOCK_ADDR: {
2316 if (TM.getCodeModel() == CodeModel::Large) {
2317 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2318 I.eraseFromParent();
2319 return true;
2320 } else {
2321 I.setDesc(TII.get(AArch64::MOVaddrBA));
2322 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2323 I.getOperand(0).getReg())
2324 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2325 /* Offset */ 0, AArch64II::MO_PAGE)
2326 .addBlockAddress(
2327 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2328 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2329 I.eraseFromParent();
2330 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2331 }
2332 }
Jessica Paquette991cb392019-04-23 20:46:19 +00002333 case TargetOpcode::G_INTRINSIC_TRUNC:
2334 return selectIntrinsicTrunc(I, MRI);
Jessica Paquette4fe75742019-04-23 23:03:03 +00002335 case TargetOpcode::G_INTRINSIC_ROUND:
2336 return selectIntrinsicRound(I, MRI);
Amara Emerson5ec14602018-12-10 18:44:58 +00002337 case TargetOpcode::G_BUILD_VECTOR:
2338 return selectBuildVector(I, MRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002339 case TargetOpcode::G_MERGE_VALUES:
2340 return selectMergeValues(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002341 case TargetOpcode::G_UNMERGE_VALUES:
2342 return selectUnmergeValues(I, MRI);
Amara Emerson1abe05c2019-02-21 20:20:16 +00002343 case TargetOpcode::G_SHUFFLE_VECTOR:
2344 return selectShuffleVector(I, MRI);
Jessica Paquette607774c2019-03-11 22:18:01 +00002345 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2346 return selectExtractElt(I, MRI);
Jessica Paquette5aff1f42019-03-14 18:01:30 +00002347 case TargetOpcode::G_INSERT_VECTOR_ELT:
2348 return selectInsertElt(I, MRI);
Amara Emerson2ff22982019-03-14 22:48:15 +00002349 case TargetOpcode::G_CONCAT_VECTORS:
2350 return selectConcatVectors(I, MRI);
Amara Emerson6e71b342019-06-21 18:10:41 +00002351 case TargetOpcode::G_JUMP_TABLE:
2352 return selectJumpTable(I, MRI);
Amara Emerson1e8c1642018-07-31 00:09:02 +00002353 }
Ahmed Bougacha6756a2c2016-07-27 14:31:55 +00002354
2355 return false;
2356}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00002357
Amara Emerson6e71b342019-06-21 18:10:41 +00002358bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2359 MachineRegisterInfo &MRI) const {
2360 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002361 Register JTAddr = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002362 unsigned JTI = I.getOperand(1).getIndex();
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002363 Register Index = I.getOperand(2).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002364 MachineIRBuilder MIB(I);
2365
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002366 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2367 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
Amara Emerson6e71b342019-06-21 18:10:41 +00002368 MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
2369 {JTAddr, Index})
2370 .addJumpTableIndex(JTI);
2371
2372 // Build the indirect branch.
2373 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2374 I.eraseFromParent();
2375 return true;
2376}
2377
2378bool AArch64InstructionSelector::selectJumpTable(
2379 MachineInstr &I, MachineRegisterInfo &MRI) const {
2380 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2381 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2382
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002383 Register DstReg = I.getOperand(0).getReg();
Amara Emerson6e71b342019-06-21 18:10:41 +00002384 unsigned JTI = I.getOperand(1).getIndex();
2385 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2386 MachineIRBuilder MIB(I);
2387 auto MovMI =
2388 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2389 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2390 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2391 I.eraseFromParent();
2392 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2393}
2394
Tim Northover01eb8692019-08-09 09:32:38 +00002395bool AArch64InstructionSelector::selectTLSGlobalValue(
2396 MachineInstr &I, MachineRegisterInfo &MRI) const {
2397 if (!STI.isTargetMachO())
2398 return false;
2399 MachineFunction &MF = *I.getParent()->getParent();
2400 MF.getFrameInfo().setAdjustsStack(true);
2401
2402 const GlobalValue &GV = *I.getOperand(1).getGlobal();
2403 MachineIRBuilder MIB(I);
2404
2405 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2406 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2407
Amara Emerson72c81b92019-08-13 06:55:32 +00002408 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2409 {Register(AArch64::X0)})
2410 .addImm(0);
Tim Northover01eb8692019-08-09 09:32:38 +00002411
2412 // TLS calls preserve all registers except those that absolutely must be
2413 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2414 // silly).
Amara Emerson72c81b92019-08-13 06:55:32 +00002415 MIB.buildInstr(AArch64::BLR, {}, {Load})
Tim Northover01eb8692019-08-09 09:32:38 +00002416 .addDef(AArch64::X0, RegState::Implicit)
2417 .addRegMask(TRI.getTLSCallPreservedMask());
2418
2419 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2420 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2421 MRI);
2422 I.eraseFromParent();
2423 return true;
2424}
2425
Jessica Paquette991cb392019-04-23 20:46:19 +00002426bool AArch64InstructionSelector::selectIntrinsicTrunc(
2427 MachineInstr &I, MachineRegisterInfo &MRI) const {
2428 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2429
2430 // Select the correct opcode.
2431 unsigned Opc = 0;
2432 if (!SrcTy.isVector()) {
2433 switch (SrcTy.getSizeInBits()) {
2434 default:
2435 case 16:
2436 Opc = AArch64::FRINTZHr;
2437 break;
2438 case 32:
2439 Opc = AArch64::FRINTZSr;
2440 break;
2441 case 64:
2442 Opc = AArch64::FRINTZDr;
2443 break;
2444 }
2445 } else {
2446 unsigned NumElts = SrcTy.getNumElements();
2447 switch (SrcTy.getElementType().getSizeInBits()) {
2448 default:
2449 break;
2450 case 16:
2451 if (NumElts == 4)
2452 Opc = AArch64::FRINTZv4f16;
2453 else if (NumElts == 8)
2454 Opc = AArch64::FRINTZv8f16;
2455 break;
2456 case 32:
2457 if (NumElts == 2)
2458 Opc = AArch64::FRINTZv2f32;
2459 else if (NumElts == 4)
2460 Opc = AArch64::FRINTZv4f32;
2461 break;
2462 case 64:
2463 if (NumElts == 2)
2464 Opc = AArch64::FRINTZv2f64;
2465 break;
2466 }
2467 }
2468
2469 if (!Opc) {
2470 // Didn't get an opcode above, bail.
2471 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
2472 return false;
2473 }
2474
2475 // Legalization would have set us up perfectly for this; we just need to
2476 // set the opcode and move on.
2477 I.setDesc(TII.get(Opc));
2478 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2479}
2480
Jessica Paquette4fe75742019-04-23 23:03:03 +00002481bool AArch64InstructionSelector::selectIntrinsicRound(
2482 MachineInstr &I, MachineRegisterInfo &MRI) const {
2483 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2484
2485 // Select the correct opcode.
2486 unsigned Opc = 0;
2487 if (!SrcTy.isVector()) {
2488 switch (SrcTy.getSizeInBits()) {
2489 default:
2490 case 16:
2491 Opc = AArch64::FRINTAHr;
2492 break;
2493 case 32:
2494 Opc = AArch64::FRINTASr;
2495 break;
2496 case 64:
2497 Opc = AArch64::FRINTADr;
2498 break;
2499 }
2500 } else {
2501 unsigned NumElts = SrcTy.getNumElements();
2502 switch (SrcTy.getElementType().getSizeInBits()) {
2503 default:
2504 break;
2505 case 16:
2506 if (NumElts == 4)
2507 Opc = AArch64::FRINTAv4f16;
2508 else if (NumElts == 8)
2509 Opc = AArch64::FRINTAv8f16;
2510 break;
2511 case 32:
2512 if (NumElts == 2)
2513 Opc = AArch64::FRINTAv2f32;
2514 else if (NumElts == 4)
2515 Opc = AArch64::FRINTAv4f32;
2516 break;
2517 case 64:
2518 if (NumElts == 2)
2519 Opc = AArch64::FRINTAv2f64;
2520 break;
2521 }
2522 }
2523
2524 if (!Opc) {
2525 // Didn't get an opcode above, bail.
2526 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
2527 return false;
2528 }
2529
2530 // Legalization would have set us up perfectly for this; we just need to
2531 // set the opcode and move on.
2532 I.setDesc(TII.get(Opc));
2533 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2534}
2535
Amara Emerson9bf092d2019-04-09 21:22:43 +00002536bool AArch64InstructionSelector::selectVectorICmp(
2537 MachineInstr &I, MachineRegisterInfo &MRI) const {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002538 Register DstReg = I.getOperand(0).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002539 LLT DstTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002540 Register SrcReg = I.getOperand(2).getReg();
2541 Register Src2Reg = I.getOperand(3).getReg();
Amara Emerson9bf092d2019-04-09 21:22:43 +00002542 LLT SrcTy = MRI.getType(SrcReg);
2543
2544 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
2545 unsigned NumElts = DstTy.getNumElements();
2546
2547 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
2548 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
2549 // Third index is cc opcode:
2550 // 0 == eq
2551 // 1 == ugt
2552 // 2 == uge
2553 // 3 == ult
2554 // 4 == ule
2555 // 5 == sgt
2556 // 6 == sge
2557 // 7 == slt
2558 // 8 == sle
2559 // ne is done by negating 'eq' result.
2560
2561 // This table below assumes that for some comparisons the operands will be
2562 // commuted.
2563 // ult op == commute + ugt op
2564 // ule op == commute + uge op
2565 // slt op == commute + sgt op
2566 // sle op == commute + sge op
2567 unsigned PredIdx = 0;
2568 bool SwapOperands = false;
2569 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
2570 switch (Pred) {
2571 case CmpInst::ICMP_NE:
2572 case CmpInst::ICMP_EQ:
2573 PredIdx = 0;
2574 break;
2575 case CmpInst::ICMP_UGT:
2576 PredIdx = 1;
2577 break;
2578 case CmpInst::ICMP_UGE:
2579 PredIdx = 2;
2580 break;
2581 case CmpInst::ICMP_ULT:
2582 PredIdx = 3;
2583 SwapOperands = true;
2584 break;
2585 case CmpInst::ICMP_ULE:
2586 PredIdx = 4;
2587 SwapOperands = true;
2588 break;
2589 case CmpInst::ICMP_SGT:
2590 PredIdx = 5;
2591 break;
2592 case CmpInst::ICMP_SGE:
2593 PredIdx = 6;
2594 break;
2595 case CmpInst::ICMP_SLT:
2596 PredIdx = 7;
2597 SwapOperands = true;
2598 break;
2599 case CmpInst::ICMP_SLE:
2600 PredIdx = 8;
2601 SwapOperands = true;
2602 break;
2603 default:
2604 llvm_unreachable("Unhandled icmp predicate");
2605 return false;
2606 }
2607
2608 // This table obviously should be tablegen'd when we have our GISel native
2609 // tablegen selector.
2610
2611 static const unsigned OpcTable[4][4][9] = {
2612 {
2613 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2614 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2615 0 /* invalid */},
2616 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2617 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2618 0 /* invalid */},
2619 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
2620 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
2621 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
2622 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
2623 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
2624 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
2625 },
2626 {
2627 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2628 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2629 0 /* invalid */},
2630 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
2631 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
2632 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
2633 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
2634 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
2635 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
2636 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2637 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2638 0 /* invalid */}
2639 },
2640 {
2641 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
2642 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
2643 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
2644 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
2645 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
2646 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
2647 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2648 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2649 0 /* invalid */},
2650 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2651 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2652 0 /* invalid */}
2653 },
2654 {
2655 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
2656 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
2657 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
2658 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2659 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2660 0 /* invalid */},
2661 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2662 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2663 0 /* invalid */},
2664 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2665 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2666 0 /* invalid */}
2667 },
2668 };
2669 unsigned EltIdx = Log2_32(SrcEltSize / 8);
2670 unsigned NumEltsIdx = Log2_32(NumElts / 2);
2671 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2672 if (!Opc) {
2673 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2674 return false;
2675 }
2676
2677 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2678 const TargetRegisterClass *SrcRC =
2679 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2680 if (!SrcRC) {
2681 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2682 return false;
2683 }
2684
2685 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2686 if (SrcTy.getSizeInBits() == 128)
2687 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2688
2689 if (SwapOperands)
2690 std::swap(SrcReg, Src2Reg);
2691
2692 MachineIRBuilder MIB(I);
2693 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2694 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2695
2696 // Invert if we had a 'ne' cc.
2697 if (NotOpc) {
2698 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2699 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2700 } else {
2701 MIB.buildCopy(DstReg, Cmp.getReg(0));
2702 }
2703 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2704 I.eraseFromParent();
2705 return true;
2706}
2707
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002708MachineInstr *AArch64InstructionSelector::emitScalarToVector(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002709 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002710 MachineIRBuilder &MIRBuilder) const {
2711 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
Amara Emerson5ec14602018-12-10 18:44:58 +00002712
2713 auto BuildFn = [&](unsigned SubregIndex) {
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002714 auto Ins =
2715 MIRBuilder
2716 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
2717 .addImm(SubregIndex);
2718 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
2719 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
2720 return &*Ins;
Amara Emerson5ec14602018-12-10 18:44:58 +00002721 };
2722
Amara Emerson8acb0d92019-03-04 19:16:00 +00002723 switch (EltSize) {
Jessica Paquette245047d2019-01-24 22:00:41 +00002724 case 16:
2725 return BuildFn(AArch64::hsub);
Amara Emerson5ec14602018-12-10 18:44:58 +00002726 case 32:
2727 return BuildFn(AArch64::ssub);
2728 case 64:
2729 return BuildFn(AArch64::dsub);
2730 default:
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00002731 return nullptr;
Amara Emerson5ec14602018-12-10 18:44:58 +00002732 }
2733}
2734
Amara Emerson8cb186c2018-12-20 01:11:04 +00002735bool AArch64InstructionSelector::selectMergeValues(
2736 MachineInstr &I, MachineRegisterInfo &MRI) const {
2737 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
2738 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2739 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2740 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
Amara Emerson511f7f52019-07-23 22:05:13 +00002741 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002742
Amara Emerson8cb186c2018-12-20 01:11:04 +00002743 if (I.getNumOperands() != 3)
2744 return false;
Amara Emerson511f7f52019-07-23 22:05:13 +00002745
2746 // Merging 2 s64s into an s128.
2747 if (DstTy == LLT::scalar(128)) {
2748 if (SrcTy.getSizeInBits() != 64)
2749 return false;
2750 MachineIRBuilder MIB(I);
2751 Register DstReg = I.getOperand(0).getReg();
2752 Register Src1Reg = I.getOperand(1).getReg();
2753 Register Src2Reg = I.getOperand(2).getReg();
2754 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
2755 MachineInstr *InsMI =
2756 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
2757 if (!InsMI)
2758 return false;
2759 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
2760 Src2Reg, /* LaneIdx */ 1, RB, MIB);
2761 if (!Ins2MI)
2762 return false;
2763 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
2764 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
2765 I.eraseFromParent();
2766 return true;
2767 }
2768
Amara Emerson8cb186c2018-12-20 01:11:04 +00002769 if (RB.getID() != AArch64::GPRRegBankID)
2770 return false;
2771
Amara Emerson511f7f52019-07-23 22:05:13 +00002772 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
2773 return false;
2774
Amara Emerson8cb186c2018-12-20 01:11:04 +00002775 auto *DstRC = &AArch64::GPR64RegClass;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002776 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002777 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2778 TII.get(TargetOpcode::SUBREG_TO_REG))
2779 .addDef(SubToRegDef)
2780 .addImm(0)
2781 .addUse(I.getOperand(1).getReg())
2782 .addImm(AArch64::sub_32);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002783 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002784 // Need to anyext the second scalar before we can use bfm
2785 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
2786 TII.get(TargetOpcode::SUBREG_TO_REG))
2787 .addDef(SubToRegDef2)
2788 .addImm(0)
2789 .addUse(I.getOperand(2).getReg())
2790 .addImm(AArch64::sub_32);
Amara Emerson8cb186c2018-12-20 01:11:04 +00002791 MachineInstr &BFM =
2792 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
Amara Emerson321bfb22018-12-20 03:27:42 +00002793 .addDef(I.getOperand(0).getReg())
Amara Emerson8cb186c2018-12-20 01:11:04 +00002794 .addUse(SubToRegDef)
2795 .addUse(SubToRegDef2)
2796 .addImm(32)
2797 .addImm(31);
2798 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
2799 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
2800 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
2801 I.eraseFromParent();
2802 return true;
2803}
2804
Jessica Paquette607774c2019-03-11 22:18:01 +00002805static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
2806 const unsigned EltSize) {
2807 // Choose a lane copy opcode and subregister based off of the size of the
2808 // vector's elements.
2809 switch (EltSize) {
2810 case 16:
2811 CopyOpc = AArch64::CPYi16;
2812 ExtractSubReg = AArch64::hsub;
2813 break;
2814 case 32:
2815 CopyOpc = AArch64::CPYi32;
2816 ExtractSubReg = AArch64::ssub;
2817 break;
2818 case 64:
2819 CopyOpc = AArch64::CPYi64;
2820 ExtractSubReg = AArch64::dsub;
2821 break;
2822 default:
2823 // Unknown size, bail out.
2824 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
2825 return false;
2826 }
2827 return true;
2828}
2829
Amara Emersond61b89b2019-03-14 22:48:18 +00002830MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002831 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
2832 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
Amara Emersond61b89b2019-03-14 22:48:18 +00002833 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2834 unsigned CopyOpc = 0;
2835 unsigned ExtractSubReg = 0;
2836 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
2837 LLVM_DEBUG(
2838 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
2839 return nullptr;
2840 }
2841
2842 const TargetRegisterClass *DstRC =
2843 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
2844 if (!DstRC) {
2845 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
2846 return nullptr;
2847 }
2848
2849 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
2850 const LLT &VecTy = MRI.getType(VecReg);
2851 const TargetRegisterClass *VecRC =
2852 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
2853 if (!VecRC) {
2854 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2855 return nullptr;
2856 }
2857
2858 // The register that we're going to copy into.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002859 Register InsertReg = VecReg;
Amara Emersond61b89b2019-03-14 22:48:18 +00002860 if (!DstReg)
2861 DstReg = MRI.createVirtualRegister(DstRC);
2862 // If the lane index is 0, we just use a subregister COPY.
2863 if (LaneIdx == 0) {
Amara Emerson86271782019-03-18 19:20:10 +00002864 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
2865 .addReg(VecReg, 0, ExtractSubReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002866 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
Amara Emerson3739a202019-03-15 21:59:50 +00002867 return &*Copy;
Amara Emersond61b89b2019-03-14 22:48:18 +00002868 }
2869
2870 // Lane copies require 128-bit wide registers. If we're dealing with an
2871 // unpacked vector, then we need to move up to that width. Insert an implicit
2872 // def and a subregister insert to get us there.
2873 if (VecTy.getSizeInBits() != 128) {
2874 MachineInstr *ScalarToVector = emitScalarToVector(
2875 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
2876 if (!ScalarToVector)
2877 return nullptr;
2878 InsertReg = ScalarToVector->getOperand(0).getReg();
2879 }
2880
2881 MachineInstr *LaneCopyMI =
2882 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
2883 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
2884
2885 // Make sure that we actually constrain the initial copy.
2886 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
2887 return LaneCopyMI;
2888}
2889
Jessica Paquette607774c2019-03-11 22:18:01 +00002890bool AArch64InstructionSelector::selectExtractElt(
2891 MachineInstr &I, MachineRegisterInfo &MRI) const {
2892 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
2893 "unexpected opcode!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002894 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002895 const LLT NarrowTy = MRI.getType(DstReg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002896 const Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette607774c2019-03-11 22:18:01 +00002897 const LLT WideTy = MRI.getType(SrcReg);
Amara Emersond61b89b2019-03-14 22:48:18 +00002898 (void)WideTy;
Jessica Paquette607774c2019-03-11 22:18:01 +00002899 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
2900 "source register size too small!");
2901 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
2902
2903 // Need the lane index to determine the correct copy opcode.
2904 MachineOperand &LaneIdxOp = I.getOperand(2);
2905 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
2906
2907 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
2908 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
2909 return false;
2910 }
2911
Jessica Paquettebb1aced2019-03-13 21:19:29 +00002912 // Find the index to extract from.
Jessica Paquette76f64b62019-04-26 21:53:13 +00002913 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
2914 if (!VRegAndVal)
Jessica Paquette607774c2019-03-11 22:18:01 +00002915 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00002916 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette607774c2019-03-11 22:18:01 +00002917
Jessica Paquette607774c2019-03-11 22:18:01 +00002918 MachineIRBuilder MIRBuilder(I);
2919
Amara Emersond61b89b2019-03-14 22:48:18 +00002920 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2921 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
2922 LaneIdx, MIRBuilder);
2923 if (!Extract)
2924 return false;
2925
2926 I.eraseFromParent();
2927 return true;
2928}
2929
2930bool AArch64InstructionSelector::selectSplitVectorUnmerge(
2931 MachineInstr &I, MachineRegisterInfo &MRI) const {
2932 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002933 Register SrcReg = I.getOperand(NumElts).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002934 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2935 const LLT SrcTy = MRI.getType(SrcReg);
2936
2937 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
2938 if (SrcTy.getSizeInBits() > 128) {
2939 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
2940 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002941 }
2942
Amara Emersond61b89b2019-03-14 22:48:18 +00002943 MachineIRBuilder MIB(I);
2944
2945 // We implement a split vector operation by treating the sub-vectors as
2946 // scalars and extracting them.
2947 const RegisterBank &DstRB =
2948 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
2949 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002950 Register Dst = I.getOperand(OpIdx).getReg();
Amara Emersond61b89b2019-03-14 22:48:18 +00002951 MachineInstr *Extract =
2952 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
2953 if (!Extract)
Jessica Paquette607774c2019-03-11 22:18:01 +00002954 return false;
Jessica Paquette607774c2019-03-11 22:18:01 +00002955 }
Jessica Paquette607774c2019-03-11 22:18:01 +00002956 I.eraseFromParent();
2957 return true;
2958}
2959
Jessica Paquette245047d2019-01-24 22:00:41 +00002960bool AArch64InstructionSelector::selectUnmergeValues(
2961 MachineInstr &I, MachineRegisterInfo &MRI) const {
2962 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2963 "unexpected opcode");
2964
2965 // TODO: Handle unmerging into GPRs and from scalars to scalars.
2966 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
2967 AArch64::FPRRegBankID ||
2968 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
2969 AArch64::FPRRegBankID) {
2970 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
2971 "currently unsupported.\n");
2972 return false;
2973 }
2974
2975 // The last operand is the vector source register, and every other operand is
2976 // a register to unpack into.
2977 unsigned NumElts = I.getNumOperands() - 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00002978 Register SrcReg = I.getOperand(NumElts).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00002979 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
2980 const LLT WideTy = MRI.getType(SrcReg);
Benjamin Kramer653020d2019-01-24 23:45:07 +00002981 (void)WideTy;
Serge Gueltona023d6b2019-08-19 14:40:33 +00002982 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
2983 "can only unmerge from vector or s128 types!");
Jessica Paquette245047d2019-01-24 22:00:41 +00002984 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
2985 "source register size too small!");
2986
Amara Emersond61b89b2019-03-14 22:48:18 +00002987 if (!NarrowTy.isScalar())
2988 return selectSplitVectorUnmerge(I, MRI);
Jessica Paquette245047d2019-01-24 22:00:41 +00002989
Amara Emerson3739a202019-03-15 21:59:50 +00002990 MachineIRBuilder MIB(I);
2991
Jessica Paquette245047d2019-01-24 22:00:41 +00002992 // Choose a lane copy opcode and subregister based off of the size of the
2993 // vector's elements.
2994 unsigned CopyOpc = 0;
2995 unsigned ExtractSubReg = 0;
Jessica Paquette607774c2019-03-11 22:18:01 +00002996 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
Jessica Paquette245047d2019-01-24 22:00:41 +00002997 return false;
Jessica Paquette245047d2019-01-24 22:00:41 +00002998
2999 // Set up for the lane copies.
3000 MachineBasicBlock &MBB = *I.getParent();
3001
3002 // Stores the registers we'll be copying from.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003003 SmallVector<Register, 4> InsertRegs;
Jessica Paquette245047d2019-01-24 22:00:41 +00003004
3005 // We'll use the first register twice, so we only need NumElts-1 registers.
3006 unsigned NumInsertRegs = NumElts - 1;
3007
3008 // If our elements fit into exactly 128 bits, then we can copy from the source
3009 // directly. Otherwise, we need to do a bit of setup with some subregister
3010 // inserts.
3011 if (NarrowTy.getSizeInBits() * NumElts == 128) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003012 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003013 } else {
3014 // No. We have to perform subregister inserts. For each insert, create an
3015 // implicit def and a subregister insert, and save the register we create.
3016 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003017 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00003018 MachineInstr &ImpDefMI =
3019 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3020 ImpDefReg);
3021
3022 // Now, create the subregister insert from SrcReg.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003023 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
Jessica Paquette245047d2019-01-24 22:00:41 +00003024 MachineInstr &InsMI =
3025 *BuildMI(MBB, I, I.getDebugLoc(),
3026 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3027 .addUse(ImpDefReg)
3028 .addUse(SrcReg)
3029 .addImm(AArch64::dsub);
3030
3031 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3032 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3033
3034 // Save the register so that we can copy from it after.
3035 InsertRegs.push_back(InsertReg);
3036 }
3037 }
3038
3039 // Now that we've created any necessary subregister inserts, we can
3040 // create the copies.
3041 //
3042 // Perform the first copy separately as a subregister copy.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003043 Register CopyTo = I.getOperand(0).getReg();
Amara Emerson86271782019-03-18 19:20:10 +00003044 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3045 .addReg(InsertRegs[0], 0, ExtractSubReg);
Amara Emerson3739a202019-03-15 21:59:50 +00003046 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
Jessica Paquette245047d2019-01-24 22:00:41 +00003047
3048 // Now, perform the remaining copies as vector lane copies.
3049 unsigned LaneIdx = 1;
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003050 for (Register InsReg : InsertRegs) {
3051 Register CopyTo = I.getOperand(LaneIdx).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003052 MachineInstr &CopyInst =
3053 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3054 .addUse(InsReg)
3055 .addImm(LaneIdx);
3056 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3057 ++LaneIdx;
3058 }
3059
3060 // Separately constrain the first copy's destination. Because of the
3061 // limitation in constrainOperandRegClass, we can't guarantee that this will
3062 // actually be constrained. So, do it ourselves using the second operand.
3063 const TargetRegisterClass *RC =
3064 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3065 if (!RC) {
3066 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3067 return false;
3068 }
3069
3070 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3071 I.eraseFromParent();
3072 return true;
3073}
3074
Amara Emerson2ff22982019-03-14 22:48:15 +00003075bool AArch64InstructionSelector::selectConcatVectors(
3076 MachineInstr &I, MachineRegisterInfo &MRI) const {
3077 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3078 "Unexpected opcode");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003079 Register Dst = I.getOperand(0).getReg();
3080 Register Op1 = I.getOperand(1).getReg();
3081 Register Op2 = I.getOperand(2).getReg();
Amara Emerson2ff22982019-03-14 22:48:15 +00003082 MachineIRBuilder MIRBuilder(I);
3083 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3084 if (!ConcatMI)
3085 return false;
3086 I.eraseFromParent();
3087 return true;
3088}
3089
Amara Emerson1abe05c2019-02-21 20:20:16 +00003090unsigned
3091AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
3092 MachineFunction &MF) const {
Hans Wennborg5d5ee4a2019-04-26 08:31:00 +00003093 Type *CPTy = CPVal->getType();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003094 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
3095 if (Align == 0)
3096 Align = MF.getDataLayout().getTypeAllocSize(CPTy);
3097
3098 MachineConstantPool *MCP = MF.getConstantPool();
3099 return MCP->getConstantPoolIndex(CPVal, Align);
3100}
3101
3102MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3103 Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3104 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3105
3106 auto Adrp =
3107 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3108 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003109
3110 MachineInstr *LoadMI = nullptr;
3111 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3112 case 16:
3113 LoadMI =
3114 &*MIRBuilder
3115 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3116 .addConstantPoolIndex(CPIdx, 0,
3117 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3118 break;
3119 case 8:
3120 LoadMI = &*MIRBuilder
3121 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3122 .addConstantPoolIndex(
3123 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3124 break;
3125 default:
3126 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3127 << *CPVal->getType());
3128 return nullptr;
3129 }
Amara Emerson1abe05c2019-02-21 20:20:16 +00003130 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003131 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3132 return LoadMI;
3133}
3134
3135/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3136/// size and RB.
3137static std::pair<unsigned, unsigned>
3138getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3139 unsigned Opc, SubregIdx;
3140 if (RB.getID() == AArch64::GPRRegBankID) {
3141 if (EltSize == 32) {
3142 Opc = AArch64::INSvi32gpr;
3143 SubregIdx = AArch64::ssub;
3144 } else if (EltSize == 64) {
3145 Opc = AArch64::INSvi64gpr;
3146 SubregIdx = AArch64::dsub;
3147 } else {
3148 llvm_unreachable("invalid elt size!");
3149 }
3150 } else {
3151 if (EltSize == 8) {
3152 Opc = AArch64::INSvi8lane;
3153 SubregIdx = AArch64::bsub;
3154 } else if (EltSize == 16) {
3155 Opc = AArch64::INSvi16lane;
3156 SubregIdx = AArch64::hsub;
3157 } else if (EltSize == 32) {
3158 Opc = AArch64::INSvi32lane;
3159 SubregIdx = AArch64::ssub;
3160 } else if (EltSize == 64) {
3161 Opc = AArch64::INSvi64lane;
3162 SubregIdx = AArch64::dsub;
3163 } else {
3164 llvm_unreachable("invalid elt size!");
3165 }
3166 }
3167 return std::make_pair(Opc, SubregIdx);
3168}
3169
Jessica Paquette99316042019-07-02 19:44:16 +00003170MachineInstr *
Jessica Paquette728b18f2019-07-24 23:11:01 +00003171AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3172 MachineOperand &RHS,
3173 MachineIRBuilder &MIRBuilder) const {
3174 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3175 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3176 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3177 {AArch64::ADDWrr, AArch64::ADDWri}};
3178 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3179 auto ImmFns = selectArithImmed(RHS);
3180 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3181 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()});
3182
3183 // If we matched a valid constant immediate, add those operands.
3184 if (ImmFns) {
3185 for (auto &RenderFn : *ImmFns)
3186 RenderFn(AddMI);
3187 } else {
3188 AddMI.addUse(RHS.getReg());
3189 }
3190
3191 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3192 return &*AddMI;
3193}
3194
3195MachineInstr *
Jessica Paquette99316042019-07-02 19:44:16 +00003196AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3197 MachineIRBuilder &MIRBuilder) const {
3198 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3199 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3200 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3201 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3202 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3203 auto ImmFns = selectArithImmed(RHS);
3204 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3205 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3206
3207 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
3208
3209 // If we matched a valid constant immediate, add those operands.
3210 if (ImmFns) {
3211 for (auto &RenderFn : *ImmFns)
3212 RenderFn(CmpMI);
3213 } else {
3214 CmpMI.addUse(RHS.getReg());
3215 }
3216
3217 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3218 return &*CmpMI;
3219}
3220
Jessica Paquette55d19242019-07-08 22:58:36 +00003221MachineInstr *
3222AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3223 MachineIRBuilder &MIRBuilder) const {
3224 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3225 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3226 bool Is32Bit = (RegSize == 32);
3227 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3228 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3229 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3230
3231 // We might be able to fold in an immediate into the TST. We need to make sure
3232 // it's a logical immediate though, since ANDS requires that.
3233 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3234 bool IsImmForm = ValAndVReg.hasValue() &&
3235 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3236 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3237 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3238
3239 if (IsImmForm)
3240 TstMI.addImm(
3241 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3242 else
3243 TstMI.addUse(RHS);
3244
3245 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3246 return &*TstMI;
3247}
3248
Jessica Paquette99316042019-07-02 19:44:16 +00003249MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
3250 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3251 MachineIRBuilder &MIRBuilder) const {
3252 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3253 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3254
Jessica Paquette55d19242019-07-08 22:58:36 +00003255 // Fold the compare if possible.
3256 MachineInstr *FoldCmp =
3257 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3258 if (FoldCmp)
3259 return FoldCmp;
Jessica Paquette99316042019-07-02 19:44:16 +00003260
3261 // Can't fold into a CMN. Just emit a normal compare.
3262 unsigned CmpOpc = 0;
3263 Register ZReg;
3264
3265 LLT CmpTy = MRI.getType(LHS.getReg());
Jessica Paquette65841092019-07-03 18:30:01 +00003266 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3267 "Expected scalar or pointer");
Jessica Paquette99316042019-07-02 19:44:16 +00003268 if (CmpTy == LLT::scalar(32)) {
3269 CmpOpc = AArch64::SUBSWrr;
3270 ZReg = AArch64::WZR;
3271 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3272 CmpOpc = AArch64::SUBSXrr;
3273 ZReg = AArch64::XZR;
3274 } else {
3275 return nullptr;
3276 }
3277
3278 // Try to match immediate forms.
3279 auto ImmFns = selectArithImmed(RHS);
3280 if (ImmFns)
3281 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
3282
3283 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
3284 // If we matched a valid constant immediate, add those operands.
3285 if (ImmFns) {
3286 for (auto &RenderFn : *ImmFns)
3287 RenderFn(CmpMI);
3288 } else {
3289 CmpMI.addUse(RHS.getReg());
3290 }
3291
3292 // Make sure that we can constrain the compare that we emitted.
3293 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3294 return &*CmpMI;
3295}
3296
Amara Emerson8acb0d92019-03-04 19:16:00 +00003297MachineInstr *AArch64InstructionSelector::emitVectorConcat(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003298 Optional<Register> Dst, Register Op1, Register Op2,
Amara Emerson2ff22982019-03-14 22:48:15 +00003299 MachineIRBuilder &MIRBuilder) const {
Amara Emerson8acb0d92019-03-04 19:16:00 +00003300 // We implement a vector concat by:
3301 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3302 // 2. Insert the upper vector into the destination's upper element
3303 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3304 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3305
3306 const LLT Op1Ty = MRI.getType(Op1);
3307 const LLT Op2Ty = MRI.getType(Op2);
3308
3309 if (Op1Ty != Op2Ty) {
3310 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3311 return nullptr;
3312 }
3313 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3314
3315 if (Op1Ty.getSizeInBits() >= 128) {
3316 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3317 return nullptr;
3318 }
3319
3320 // At the moment we just support 64 bit vector concats.
3321 if (Op1Ty.getSizeInBits() != 64) {
3322 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3323 return nullptr;
3324 }
3325
3326 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3327 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3328 const TargetRegisterClass *DstRC =
3329 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3330
3331 MachineInstr *WidenedOp1 =
3332 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3333 MachineInstr *WidenedOp2 =
3334 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3335 if (!WidenedOp1 || !WidenedOp2) {
3336 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3337 return nullptr;
3338 }
3339
3340 // Now do the insert of the upper element.
3341 unsigned InsertOpc, InsSubRegIdx;
3342 std::tie(InsertOpc, InsSubRegIdx) =
3343 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3344
Amara Emerson2ff22982019-03-14 22:48:15 +00003345 if (!Dst)
3346 Dst = MRI.createVirtualRegister(DstRC);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003347 auto InsElt =
3348 MIRBuilder
Amara Emerson2ff22982019-03-14 22:48:15 +00003349 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
Amara Emerson8acb0d92019-03-04 19:16:00 +00003350 .addImm(1) /* Lane index */
3351 .addUse(WidenedOp2->getOperand(0).getReg())
3352 .addImm(0);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003353 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3354 return &*InsElt;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003355}
3356
Jessica Paquettea3843fe2019-05-01 22:39:43 +00003357MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3358 MachineInstr &I, MachineRegisterInfo &MRI) const {
3359 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3360 "Expected a G_FCONSTANT!");
3361 MachineOperand &ImmOp = I.getOperand(1);
3362 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3363
3364 // Only handle 32 and 64 bit defs for now.
3365 if (DefSize != 32 && DefSize != 64)
3366 return nullptr;
3367
3368 // Don't handle null values using FMOV.
3369 if (ImmOp.getFPImm()->isNullValue())
3370 return nullptr;
3371
3372 // Get the immediate representation for the FMOV.
3373 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3374 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3375 : AArch64_AM::getFP64Imm(ImmValAPF);
3376
3377 // If this is -1, it means the immediate can't be represented as the requested
3378 // floating point value. Bail.
3379 if (Imm == -1)
3380 return nullptr;
3381
3382 // Update MI to represent the new FMOV instruction, constrain it, and return.
3383 ImmOp.ChangeToImmediate(Imm);
3384 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3385 I.setDesc(TII.get(MovOpc));
3386 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3387 return &I;
3388}
3389
Jessica Paquette49537bb2019-06-17 18:40:06 +00003390MachineInstr *
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003391AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
Jessica Paquette49537bb2019-06-17 18:40:06 +00003392 MachineIRBuilder &MIRBuilder) const {
3393 // CSINC increments the result when the predicate is false. Invert it.
3394 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3395 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3396 auto I =
3397 MIRBuilder
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003398 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
Jessica Paquette49537bb2019-06-17 18:40:06 +00003399 .addImm(InvCC);
3400 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3401 return &*I;
3402}
3403
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003404bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3405 MachineIRBuilder MIB(I);
3406 MachineRegisterInfo &MRI = *MIB.getMRI();
3407 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3408
3409 // We want to recognize this pattern:
3410 //
3411 // $z = G_FCMP pred, $x, $y
3412 // ...
3413 // $w = G_SELECT $z, $a, $b
3414 //
3415 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3416 // some copies/truncs in between.)
3417 //
3418 // If we see this, then we can emit something like this:
3419 //
3420 // fcmp $x, $y
3421 // fcsel $w, $a, $b, pred
3422 //
3423 // Rather than emitting both of the rather long sequences in the standard
3424 // G_FCMP/G_SELECT select methods.
3425
3426 // First, check if the condition is defined by a compare.
3427 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3428 while (CondDef) {
3429 // We can only fold if all of the defs have one use.
3430 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
3431 return false;
3432
3433 // We can skip over G_TRUNC since the condition is 1-bit.
3434 // Truncating/extending can have no impact on the value.
3435 unsigned Opc = CondDef->getOpcode();
3436 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3437 break;
3438
Amara Emersond940e202019-06-06 07:33:47 +00003439 // Can't see past copies from physregs.
3440 if (Opc == TargetOpcode::COPY &&
Daniel Sanders2bea69b2019-08-01 23:27:28 +00003441 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
Amara Emersond940e202019-06-06 07:33:47 +00003442 return false;
3443
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003444 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3445 }
3446
3447 // Is the condition defined by a compare?
Jessica Paquette99316042019-07-02 19:44:16 +00003448 if (!CondDef)
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003449 return false;
3450
Jessica Paquette99316042019-07-02 19:44:16 +00003451 unsigned CondOpc = CondDef->getOpcode();
3452 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
3453 return false;
3454
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003455 AArch64CC::CondCode CondCode;
Jessica Paquette99316042019-07-02 19:44:16 +00003456 if (CondOpc == TargetOpcode::G_ICMP) {
3457 CondCode = changeICMPPredToAArch64CC(
3458 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
3459 if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
3460 CondDef->getOperand(1), MIB)) {
3461 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
3462 return false;
3463 }
3464 } else {
3465 // Get the condition code for the select.
3466 AArch64CC::CondCode CondCode2;
3467 changeFCMPPredToAArch64CC(
3468 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
3469 CondCode2);
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003470
Jessica Paquette99316042019-07-02 19:44:16 +00003471 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
3472 // instructions to emit the comparison.
3473 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
3474 // unnecessary.
3475 if (CondCode2 != AArch64CC::AL)
3476 return false;
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003477
Jessica Paquette99316042019-07-02 19:44:16 +00003478 // Make sure we'll be able to select the compare.
3479 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
3480 if (!CmpOpc)
3481 return false;
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003482
Jessica Paquette99316042019-07-02 19:44:16 +00003483 // Emit a new compare.
3484 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
3485 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
3486 Cmp.addUse(CondDef->getOperand(3).getReg());
3487 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3488 }
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003489
3490 // Emit the select.
3491 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
3492 auto CSel =
3493 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
3494 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
3495 .addImm(CondCode);
Amara Emersonc37ff0d2019-06-05 23:46:16 +00003496 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
3497 I.eraseFromParent();
3498 return true;
3499}
3500
Jessica Paquette55d19242019-07-08 22:58:36 +00003501MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
3502 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3503 MachineIRBuilder &MIRBuilder) const {
Jessica Paquette99316042019-07-02 19:44:16 +00003504 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
3505 "Unexpected MachineOperand");
Jessica Paquette49537bb2019-06-17 18:40:06 +00003506 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3507 // We want to find this sort of thing:
3508 // x = G_SUB 0, y
3509 // G_ICMP z, x
3510 //
3511 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
3512 // e.g:
3513 //
3514 // cmn z, y
3515
Jessica Paquette49537bb2019-06-17 18:40:06 +00003516 // Helper lambda to detect the subtract followed by the compare.
3517 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
3518 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
3519 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
3520 return false;
3521
3522 // Need to make sure NZCV is the same at the end of the transformation.
3523 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
3524 return false;
3525
3526 // We want to match against SUBs.
3527 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
3528 return false;
3529
3530 // Make sure that we're getting
3531 // x = G_SUB 0, y
3532 auto ValAndVReg =
3533 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
3534 if (!ValAndVReg || ValAndVReg->Value != 0)
3535 return false;
3536
3537 // This can safely be represented as a CMN.
3538 return true;
3539 };
3540
3541 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
Jessica Paquette31329682019-07-10 18:44:57 +00003542 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
3543 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
Jessica Paquette55d19242019-07-08 22:58:36 +00003544 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3545 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
Jessica Paquette99316042019-07-02 19:44:16 +00003546
Jessica Paquette55d19242019-07-08 22:58:36 +00003547 // Given this:
3548 //
3549 // x = G_SUB 0, y
3550 // G_ICMP x, z
3551 //
3552 // Produce this:
3553 //
3554 // cmn y, z
3555 if (IsCMN(LHSDef, CC))
3556 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
3557
3558 // Same idea here, but with the RHS of the compare instead:
3559 //
3560 // Given this:
3561 //
3562 // x = G_SUB 0, y
3563 // G_ICMP z, x
3564 //
3565 // Produce this:
3566 //
3567 // cmn z, y
3568 if (IsCMN(RHSDef, CC))
3569 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
3570
3571 // Given this:
3572 //
3573 // z = G_AND x, y
3574 // G_ICMP z, 0
3575 //
3576 // Produce this if the compare is signed:
3577 //
3578 // tst x, y
3579 if (!isUnsignedICMPPred(P) && LHSDef &&
3580 LHSDef->getOpcode() == TargetOpcode::G_AND) {
3581 // Make sure that the RHS is 0.
3582 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
3583 if (!ValAndVReg || ValAndVReg->Value != 0)
3584 return nullptr;
3585
3586 return emitTST(LHSDef->getOperand(1).getReg(),
3587 LHSDef->getOperand(2).getReg(), MIRBuilder);
Jessica Paquette49537bb2019-06-17 18:40:06 +00003588 }
3589
Jessica Paquette99316042019-07-02 19:44:16 +00003590 return nullptr;
Jessica Paquette49537bb2019-06-17 18:40:06 +00003591}
3592
Amara Emerson761ca2e2019-03-19 21:43:05 +00003593bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
3594 // Try to match a vector splat operation into a dup instruction.
3595 // We're looking for this pattern:
3596 // %scalar:gpr(s64) = COPY $x0
3597 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
3598 // %cst0:gpr(s32) = G_CONSTANT i32 0
3599 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
3600 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
3601 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
3602 // %zerovec(<2 x s32>)
3603 //
3604 // ...into:
3605 // %splat = DUP %scalar
3606 // We use the regbank of the scalar to determine which kind of dup to use.
3607 MachineIRBuilder MIB(I);
3608 MachineRegisterInfo &MRI = *MIB.getMRI();
3609 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3610 using namespace TargetOpcode;
3611 using namespace MIPatternMatch;
3612
3613 // Begin matching the insert.
3614 auto *InsMI =
Jessica Paquette7c959252019-07-10 18:46:56 +00003615 getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
Amara Emerson761ca2e2019-03-19 21:43:05 +00003616 if (!InsMI)
3617 return false;
3618 // Match the undef vector operand.
3619 auto *UndefMI =
Jessica Paquette7c959252019-07-10 18:46:56 +00003620 getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
Amara Emerson761ca2e2019-03-19 21:43:05 +00003621 if (!UndefMI)
3622 return false;
3623 // Match the scalar being splatted.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003624 Register ScalarReg = InsMI->getOperand(2).getReg();
Amara Emerson761ca2e2019-03-19 21:43:05 +00003625 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
3626 // Match the index constant 0.
3627 int64_t Index = 0;
3628 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
3629 return false;
3630
3631 // The shuffle's second operand doesn't matter if the mask is all zero.
Matt Arsenault5af9cf02019-08-13 15:34:38 +00003632 const Constant *Mask = I.getOperand(3).getShuffleMask();
3633 if (!isa<ConstantAggregateZero>(Mask))
Amara Emerson761ca2e2019-03-19 21:43:05 +00003634 return false;
Amara Emerson761ca2e2019-03-19 21:43:05 +00003635
3636 // We're done, now find out what kind of splat we need.
3637 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3638 LLT EltTy = VecTy.getElementType();
3639 if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
3640 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
3641 return false;
3642 }
3643 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
3644 static const unsigned OpcTable[2][2] = {
3645 {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
3646 {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
3647 unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
3648
3649 // For FP splats, we need to widen the scalar reg via undef too.
3650 if (IsFP) {
3651 MachineInstr *Widen = emitScalarToVector(
3652 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
3653 if (!Widen)
3654 return false;
3655 ScalarReg = Widen->getOperand(0).getReg();
3656 }
3657 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
3658 if (IsFP)
3659 Dup.addImm(0);
3660 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
3661 I.eraseFromParent();
3662 return true;
3663}
3664
3665bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
3666 if (TM.getOptLevel() == CodeGenOpt::None)
3667 return false;
3668 if (tryOptVectorDup(I))
3669 return true;
3670 return false;
3671}
3672
Amara Emerson1abe05c2019-02-21 20:20:16 +00003673bool AArch64InstructionSelector::selectShuffleVector(
3674 MachineInstr &I, MachineRegisterInfo &MRI) const {
Amara Emerson761ca2e2019-03-19 21:43:05 +00003675 if (tryOptVectorShuffle(I))
3676 return true;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003677 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003678 Register Src1Reg = I.getOperand(1).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003679 const LLT Src1Ty = MRI.getType(Src1Reg);
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003680 Register Src2Reg = I.getOperand(2).getReg();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003681 const LLT Src2Ty = MRI.getType(Src2Reg);
Matt Arsenault5af9cf02019-08-13 15:34:38 +00003682 const Constant *ShuffleMask = I.getOperand(3).getShuffleMask();
Amara Emerson1abe05c2019-02-21 20:20:16 +00003683
3684 MachineBasicBlock &MBB = *I.getParent();
3685 MachineFunction &MF = *MBB.getParent();
3686 LLVMContext &Ctx = MF.getFunction().getContext();
3687
Matt Arsenault5af9cf02019-08-13 15:34:38 +00003688 SmallVector<int, 8> Mask;
3689 ShuffleVectorInst::getShuffleMask(ShuffleMask, Mask);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003690
3691 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
3692 // it's originated from a <1 x T> type. Those should have been lowered into
3693 // G_BUILD_VECTOR earlier.
3694 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
3695 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
3696 return false;
3697 }
3698
3699 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
3700
3701 SmallVector<Constant *, 64> CstIdxs;
Matt Arsenault5af9cf02019-08-13 15:34:38 +00003702 for (int Val : Mask) {
Amara Emerson2806fd02019-04-12 21:31:21 +00003703 // For now, any undef indexes we'll just assume to be 0. This should be
3704 // optimized in future, e.g. to select DUP etc.
Matt Arsenault5af9cf02019-08-13 15:34:38 +00003705 Val = Val < 0 ? 0 : Val;
Amara Emerson1abe05c2019-02-21 20:20:16 +00003706 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
3707 unsigned Offset = Byte + Val * BytesPerElt;
3708 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
3709 }
3710 }
3711
Amara Emerson8acb0d92019-03-04 19:16:00 +00003712 MachineIRBuilder MIRBuilder(I);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003713
3714 // Use a constant pool to load the index vector for TBL.
3715 Constant *CPVal = ConstantVector::get(CstIdxs);
Amara Emerson1abe05c2019-02-21 20:20:16 +00003716 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
3717 if (!IndexLoad) {
3718 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
3719 return false;
3720 }
3721
Amara Emerson8acb0d92019-03-04 19:16:00 +00003722 if (DstTy.getSizeInBits() != 128) {
3723 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
3724 // This case can be done with TBL1.
Amara Emerson2ff22982019-03-14 22:48:15 +00003725 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003726 if (!Concat) {
3727 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
3728 return false;
3729 }
3730
3731 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
3732 IndexLoad =
3733 emitScalarToVector(64, &AArch64::FPR128RegClass,
3734 IndexLoad->getOperand(0).getReg(), MIRBuilder);
3735
3736 auto TBL1 = MIRBuilder.buildInstr(
3737 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
3738 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
3739 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
3740
Amara Emerson3739a202019-03-15 21:59:50 +00003741 auto Copy =
Amara Emerson86271782019-03-18 19:20:10 +00003742 MIRBuilder
3743 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
3744 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
Amara Emerson8acb0d92019-03-04 19:16:00 +00003745 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
3746 I.eraseFromParent();
3747 return true;
3748 }
3749
Amara Emerson1abe05c2019-02-21 20:20:16 +00003750 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
3751 // Q registers for regalloc.
3752 auto RegSeq = MIRBuilder
3753 .buildInstr(TargetOpcode::REG_SEQUENCE,
3754 {&AArch64::QQRegClass}, {Src1Reg})
3755 .addImm(AArch64::qsub0)
3756 .addUse(Src2Reg)
3757 .addImm(AArch64::qsub1);
3758
3759 auto TBL2 =
3760 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
3761 {RegSeq, IndexLoad->getOperand(0).getReg()});
3762 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
3763 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
3764 I.eraseFromParent();
3765 return true;
3766}
3767
Jessica Paquette16d67a32019-03-13 23:22:23 +00003768MachineInstr *AArch64InstructionSelector::emitLaneInsert(
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003769 Optional<Register> DstReg, Register SrcReg, Register EltReg,
Jessica Paquette16d67a32019-03-13 23:22:23 +00003770 unsigned LaneIdx, const RegisterBank &RB,
3771 MachineIRBuilder &MIRBuilder) const {
3772 MachineInstr *InsElt = nullptr;
3773 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
3774 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3775
3776 // Create a register to define with the insert if one wasn't passed in.
3777 if (!DstReg)
3778 DstReg = MRI.createVirtualRegister(DstRC);
3779
3780 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
3781 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
3782
3783 if (RB.getID() == AArch64::FPRRegBankID) {
3784 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
3785 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3786 .addImm(LaneIdx)
3787 .addUse(InsSub->getOperand(0).getReg())
3788 .addImm(0);
3789 } else {
3790 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
3791 .addImm(LaneIdx)
3792 .addUse(EltReg);
3793 }
3794
3795 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3796 return InsElt;
3797}
3798
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003799bool AArch64InstructionSelector::selectInsertElt(
3800 MachineInstr &I, MachineRegisterInfo &MRI) const {
3801 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
3802
3803 // Get information on the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003804 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003805 const LLT DstTy = MRI.getType(DstReg);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003806 unsigned VecSize = DstTy.getSizeInBits();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003807
3808 // Get information on the element we want to insert into the destination.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003809 Register EltReg = I.getOperand(2).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003810 const LLT EltTy = MRI.getType(EltReg);
3811 unsigned EltSize = EltTy.getSizeInBits();
3812 if (EltSize < 16 || EltSize > 64)
3813 return false; // Don't support all element types yet.
3814
3815 // Find the definition of the index. Bail out if it's not defined by a
3816 // G_CONSTANT.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003817 Register IdxReg = I.getOperand(3).getReg();
Jessica Paquette76f64b62019-04-26 21:53:13 +00003818 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
3819 if (!VRegAndVal)
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003820 return false;
Jessica Paquette76f64b62019-04-26 21:53:13 +00003821 unsigned LaneIdx = VRegAndVal->Value;
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003822
3823 // Perform the lane insert.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003824 Register SrcReg = I.getOperand(1).getReg();
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003825 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
3826 MachineIRBuilder MIRBuilder(I);
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003827
3828 if (VecSize < 128) {
3829 // If the vector we're inserting into is smaller than 128 bits, widen it
3830 // to 128 to do the insert.
3831 MachineInstr *ScalarToVec = emitScalarToVector(
3832 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
3833 if (!ScalarToVec)
3834 return false;
3835 SrcReg = ScalarToVec->getOperand(0).getReg();
3836 }
3837
3838 // Create an insert into a new FPR128 register.
3839 // Note that if our vector is already 128 bits, we end up emitting an extra
3840 // register.
3841 MachineInstr *InsMI =
3842 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
3843
3844 if (VecSize < 128) {
3845 // If we had to widen to perform the insert, then we have to demote back to
3846 // the original size to get the result we want.
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003847 Register DemoteVec = InsMI->getOperand(0).getReg();
Jessica Paquetted3ffd472019-03-29 21:39:36 +00003848 const TargetRegisterClass *RC =
3849 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
3850 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3851 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3852 return false;
3853 }
3854 unsigned SubReg = 0;
3855 if (!getSubRegForClass(RC, TRI, SubReg))
3856 return false;
3857 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3858 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
3859 << "\n");
3860 return false;
3861 }
3862 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3863 .addReg(DemoteVec, 0, SubReg);
3864 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3865 } else {
3866 // No widening needed.
3867 InsMI->getOperand(0).setReg(DstReg);
3868 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3869 }
3870
Jessica Paquette5aff1f42019-03-14 18:01:30 +00003871 I.eraseFromParent();
3872 return true;
3873}
3874
Amara Emerson5ec14602018-12-10 18:44:58 +00003875bool AArch64InstructionSelector::selectBuildVector(
3876 MachineInstr &I, MachineRegisterInfo &MRI) const {
3877 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3878 // Until we port more of the optimized selections, for now just use a vector
3879 // insert sequence.
3880 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3881 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
3882 unsigned EltSize = EltTy.getSizeInBits();
Jessica Paquette245047d2019-01-24 22:00:41 +00003883 if (EltSize < 16 || EltSize > 64)
Amara Emerson5ec14602018-12-10 18:44:58 +00003884 return false; // Don't support all element types yet.
3885 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003886 MachineIRBuilder MIRBuilder(I);
Jessica Paquette245047d2019-01-24 22:00:41 +00003887
3888 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003889 MachineInstr *ScalarToVec =
Amara Emerson8acb0d92019-03-04 19:16:00 +00003890 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
3891 I.getOperand(1).getReg(), MIRBuilder);
Amara Emerson6bcfa1c2019-02-25 18:52:54 +00003892 if (!ScalarToVec)
Jessica Paquette245047d2019-01-24 22:00:41 +00003893 return false;
3894
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003895 Register DstVec = ScalarToVec->getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003896 unsigned DstSize = DstTy.getSizeInBits();
3897
3898 // Keep track of the last MI we inserted. Later on, we might be able to save
3899 // a copy using it.
3900 MachineInstr *PrevMI = nullptr;
3901 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
Jessica Paquette16d67a32019-03-13 23:22:23 +00003902 // Note that if we don't do a subregister copy, we can end up making an
3903 // extra register.
3904 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
3905 MIRBuilder);
3906 DstVec = PrevMI->getOperand(0).getReg();
Amara Emerson5ec14602018-12-10 18:44:58 +00003907 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003908
3909 // If DstTy's size in bits is less than 128, then emit a subregister copy
3910 // from DstVec to the last register we've defined.
3911 if (DstSize < 128) {
Jessica Paquette85ace622019-03-13 23:29:54 +00003912 // Force this to be FPR using the destination vector.
3913 const TargetRegisterClass *RC =
3914 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
Jessica Paquette245047d2019-01-24 22:00:41 +00003915 if (!RC)
3916 return false;
Jessica Paquette85ace622019-03-13 23:29:54 +00003917 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3918 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3919 return false;
3920 }
3921
3922 unsigned SubReg = 0;
3923 if (!getSubRegForClass(RC, TRI, SubReg))
3924 return false;
3925 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3926 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
3927 << "\n");
3928 return false;
3929 }
Jessica Paquette245047d2019-01-24 22:00:41 +00003930
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00003931 Register Reg = MRI.createVirtualRegister(RC);
3932 Register DstReg = I.getOperand(0).getReg();
Jessica Paquette245047d2019-01-24 22:00:41 +00003933
Amara Emerson86271782019-03-18 19:20:10 +00003934 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3935 .addReg(DstVec, 0, SubReg);
Jessica Paquette245047d2019-01-24 22:00:41 +00003936 MachineOperand &RegOp = I.getOperand(1);
3937 RegOp.setReg(Reg);
3938 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3939 } else {
3940 // We don't need a subregister copy. Save a copy by re-using the
3941 // destination register on the final insert.
3942 assert(PrevMI && "PrevMI was null?");
3943 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
3944 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
3945 }
3946
Amara Emerson5ec14602018-12-10 18:44:58 +00003947 I.eraseFromParent();
3948 return true;
3949}
3950
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003951/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
3952/// ID if it exists, and 0 otherwise.
3953static unsigned findIntrinsicID(MachineInstr &I) {
3954 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
3955 return Op.isIntrinsicID();
3956 });
3957 if (IntrinOp == I.operands_end())
3958 return 0;
3959 return IntrinOp->getIntrinsicID();
3960}
3961
Jessica Paquette22c62152019-04-02 19:57:26 +00003962/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
3963/// intrinsic.
3964static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
3965 switch (NumBytesToStore) {
Jessica Paquetteaa8b9992019-07-26 23:28:53 +00003966 // TODO: 1 and 2 byte stores
3967 case 4:
3968 return AArch64::STLXRW;
Jessica Paquette22c62152019-04-02 19:57:26 +00003969 case 8:
3970 return AArch64::STLXRX;
3971 default:
3972 LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
3973 << NumBytesToStore << ")\n");
3974 break;
3975 }
3976 return 0;
3977}
3978
3979bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
3980 MachineInstr &I, MachineRegisterInfo &MRI) const {
3981 // Find the intrinsic ID.
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00003982 unsigned IntrinID = findIntrinsicID(I);
3983 if (!IntrinID)
Jessica Paquette22c62152019-04-02 19:57:26 +00003984 return false;
Jessica Paquette22c62152019-04-02 19:57:26 +00003985 MachineIRBuilder MIRBuilder(I);
3986
3987 // Select the instruction.
3988 switch (IntrinID) {
3989 default:
3990 return false;
3991 case Intrinsic::trap:
3992 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
3993 break;
Tom Tan7ecb5142019-06-21 23:38:05 +00003994 case Intrinsic::debugtrap:
3995 if (!STI.isTargetWindows())
3996 return false;
3997 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
3998 break;
Jessica Paquette22c62152019-04-02 19:57:26 +00003999 case Intrinsic::aarch64_stlxr:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00004000 Register StatReg = I.getOperand(0).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00004001 assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
4002 "Status register must be 32 bits!");
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00004003 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00004004
4005 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
4006 LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
4007 return false;
4008 }
4009
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00004010 Register PtrReg = I.getOperand(3).getReg();
Jessica Paquette22c62152019-04-02 19:57:26 +00004011 assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
4012
4013 // Expect only one memory operand.
4014 if (!I.hasOneMemOperand())
4015 return false;
4016
4017 const MachineMemOperand *MemOp = *I.memoperands_begin();
4018 unsigned NumBytesToStore = MemOp->getSize();
4019 unsigned Opc = getStlxrOpcode(NumBytesToStore);
4020 if (!Opc)
4021 return false;
Jessica Paquetteaa8b9992019-07-26 23:28:53 +00004022 unsigned NumBitsToStore = NumBytesToStore * 8;
4023 if (NumBitsToStore != 64) {
4024 // The intrinsic always has a 64-bit source, but we might actually want
4025 // a differently-sized source for the instruction. Try to get it.
4026 // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
4027 // just handle 4-byte stores.
4028 // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
4029 // to the right size for the STLXR.
4030 MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
4031 if (!Zext)
4032 return false;
4033 SrcReg = Zext->getOperand(1).getReg();
4034 // We should get an appropriately-sized register here.
4035 if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
4036 return false;
4037 }
4038 auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
4039 .addMemOperand(*I.memoperands_begin());
Jessica Paquette22c62152019-04-02 19:57:26 +00004040 constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
4041 }
4042
4043 I.eraseFromParent();
4044 return true;
4045}
4046
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00004047bool AArch64InstructionSelector::selectIntrinsic(
4048 MachineInstr &I, MachineRegisterInfo &MRI) const {
4049 unsigned IntrinID = findIntrinsicID(I);
4050 if (!IntrinID)
4051 return false;
4052 MachineIRBuilder MIRBuilder(I);
4053
4054 switch (IntrinID) {
4055 default:
4056 break;
4057 case Intrinsic::aarch64_crypto_sha1h:
Matt Arsenaultfaeaedf2019-06-24 16:16:12 +00004058 Register DstReg = I.getOperand(0).getReg();
4059 Register SrcReg = I.getOperand(2).getReg();
Jessica Paquette7f6fe7c2019-04-29 20:58:17 +00004060
4061 // FIXME: Should this be an assert?
4062 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4063 MRI.getType(SrcReg).getSizeInBits() != 32)
4064 return false;
4065
4066 // The operation has to happen on FPRs. Set up some new FPR registers for
4067 // the source and destination if they are on GPRs.
4068 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4069 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4070 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4071
4072 // Make sure the copy ends up getting constrained properly.
4073 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4074 AArch64::GPR32RegClass, MRI);
4075 }
4076
4077 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4078 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4079
4080 // Actually insert the instruction.
4081 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4082 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4083
4084 // Did we create a new register for the destination?
4085 if (DstReg != I.getOperand(0).getReg()) {
4086 // Yep. Copy the result of the instruction back into the original
4087 // destination.
4088 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4089 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4090 AArch64::GPR32RegClass, MRI);
4091 }
4092
4093 I.eraseFromParent();
4094 return true;
4095 }
4096 return false;
4097}
4098
Amara Emersoncac11512019-07-03 01:49:06 +00004099static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
4100 auto &MI = *Root.getParent();
4101 auto &MBB = *MI.getParent();
4102 auto &MF = *MBB.getParent();
4103 auto &MRI = MF.getRegInfo();
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004104 uint64_t Immed;
4105 if (Root.isImm())
4106 Immed = Root.getImm();
4107 else if (Root.isCImm())
4108 Immed = Root.getCImm()->getZExtValue();
4109 else if (Root.isReg()) {
Jessica Paquettea99cfee2019-07-03 17:46:23 +00004110 auto ValAndVReg =
4111 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
4112 if (!ValAndVReg)
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004113 return None;
Jessica Paquettea99cfee2019-07-03 17:46:23 +00004114 Immed = ValAndVReg->Value;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004115 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004116 return None;
Amara Emersoncac11512019-07-03 01:49:06 +00004117 return Immed;
4118}
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004119
Amara Emersoncac11512019-07-03 01:49:06 +00004120InstructionSelector::ComplexRendererFns
4121AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4122 auto MaybeImmed = getImmedFromMO(Root);
4123 if (MaybeImmed == None || *MaybeImmed > 31)
4124 return None;
4125 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4126 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4127}
4128
4129InstructionSelector::ComplexRendererFns
4130AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4131 auto MaybeImmed = getImmedFromMO(Root);
4132 if (MaybeImmed == None || *MaybeImmed > 31)
4133 return None;
4134 uint64_t Enc = 31 - *MaybeImmed;
4135 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4136}
4137
4138InstructionSelector::ComplexRendererFns
4139AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4140 auto MaybeImmed = getImmedFromMO(Root);
4141 if (MaybeImmed == None || *MaybeImmed > 63)
4142 return None;
4143 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4144 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4145}
4146
4147InstructionSelector::ComplexRendererFns
4148AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4149 auto MaybeImmed = getImmedFromMO(Root);
4150 if (MaybeImmed == None || *MaybeImmed > 63)
4151 return None;
4152 uint64_t Enc = 63 - *MaybeImmed;
4153 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4154}
4155
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004156/// Helper to select an immediate value that can be represented as a 12-bit
4157/// value shifted left by either 0 or 12. If it is possible to do so, return
4158/// the immediate and shift value. If not, return None.
4159///
4160/// Used by selectArithImmed and selectNegArithImmed.
Amara Emersoncac11512019-07-03 01:49:06 +00004161InstructionSelector::ComplexRendererFns
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004162AArch64InstructionSelector::select12BitValueWithLeftShift(
4163 uint64_t Immed) const {
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004164 unsigned ShiftAmt;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004165 if (Immed >> 12 == 0) {
4166 ShiftAmt = 0;
4167 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4168 ShiftAmt = 12;
4169 Immed = Immed >> 12;
4170 } else
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004171 return None;
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004172
4173 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
Daniel Sandersdf39cba2017-10-15 18:22:54 +00004174 return {{
4175 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4176 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4177 }};
Daniel Sanders8a4bae92017-03-14 21:32:08 +00004178}
Daniel Sanders0b5293f2017-04-06 09:49:34 +00004179
Jessica Paquettee4c46c32019-08-02 18:12:53 +00004180/// SelectArithImmed - Select an immediate value that can be represented as
4181/// a 12-bit value shifted left by either 0 or 12. If so, return true with
4182/// Val set to the 12-bit value and Shift set to the shifter operand.
4183InstructionSelector::ComplexRendererFns
4184AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4185 // This function is called from the addsub_shifted_imm ComplexPattern,
4186 // which lists [imm] as the list of opcode it's interested in, however
4187 // we still need to check whether the operand is actually an immediate
4188 // here because the ComplexPattern opcode list is only used in
4189 // root-level opcode matching.
4190 auto MaybeImmed = getImmedFromMO(Root);
4191 if (MaybeImmed == None)
4192 return None;
4193 return select12BitValueWithLeftShift(*MaybeImmed);
4194}
4195
4196/// SelectNegArithImmed - As above, but negates the value before trying to
4197/// select it.
4198InstructionSelector::ComplexRendererFns
4199AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4200 // We need a register here, because we need to know if we have a 64 or 32
4201 // bit immediate.
4202 if (!Root.isReg())
4203 return None;
4204 auto MaybeImmed = getImmedFromMO(Root);
4205 if (MaybeImmed == None)
4206 return None;
4207 uint64_t Immed = *MaybeImmed;
4208
4209 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4210 // have the opposite effect on the C flag, so this pattern mustn't match under
4211 // those circumstances.
4212 if (Immed == 0)
4213 return None;
4214
4215 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4216 // the root.
4217 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4218 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4219 Immed = ~((uint32_t)Immed) + 1;
4220 else
4221 Immed = ~Immed + 1ULL;
4222
4223 if (Immed & 0xFFFFFFFFFF000000ULL)
4224 return None;
4225
4226 Immed &= 0xFFFFFFULL;
4227 return select12BitValueWithLeftShift(Immed);
4228}
4229
Jessica Paquette2b404d02019-07-23 16:09:42 +00004230/// Return true if it is worth folding MI into an extended register. That is,
4231/// if it's safe to pull it into the addressing mode of a load or store as a
4232/// shift.
4233bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4234 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4235 // Always fold if there is one use, or if we're optimizing for size.
4236 Register DefReg = MI.getOperand(0).getReg();
4237 if (MRI.hasOneUse(DefReg) ||
4238 MI.getParent()->getParent()->getFunction().hasMinSize())
4239 return true;
4240
4241 // It's better to avoid folding and recomputing shifts when we don't have a
4242 // fastpath.
4243 if (!STI.hasLSLFast())
4244 return false;
4245
4246 // We have a fastpath, so folding a shift in and potentially computing it
4247 // many times may be beneficial. Check if this is only used in memory ops.
4248 // If it is, then we should fold.
4249 return all_of(MRI.use_instructions(DefReg),
4250 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4251}
4252
4253/// This is used for computing addresses like this:
4254///
4255/// ldr x1, [x2, x3, lsl #3]
4256///
4257/// Where x2 is the base register, and x3 is an offset register. The shift-left
4258/// is a constant value specific to this load instruction. That is, we'll never
4259/// see anything other than a 3 here (which corresponds to the size of the
4260/// element being loaded.)
4261InstructionSelector::ComplexRendererFns
4262AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4263 MachineOperand &Root, unsigned SizeInBytes) const {
4264 if (!Root.isReg())
4265 return None;
4266 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4267
4268 // Make sure that the memory op is a valid size.
4269 int64_t LegalShiftVal = Log2_32(SizeInBytes);
4270 if (LegalShiftVal == 0)
4271 return None;
4272
4273 // We want to find something like this:
4274 //
4275 // val = G_CONSTANT LegalShiftVal
4276 // shift = G_SHL off_reg val
4277 // ptr = G_GEP base_reg shift
4278 // x = G_LOAD ptr
4279 //
4280 // And fold it into this addressing mode:
4281 //
4282 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
4283
4284 // Check if we can find the G_GEP.
4285 MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_GEP, Root.getReg(), MRI);
4286 if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
4287 return None;
4288
Jessica Paquette68499112019-07-24 22:49:42 +00004289 // Now, try to match an opcode which will match our specific offset.
4290 // We want a G_SHL or a G_MUL.
4291 MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
4292 if (!OffsetInst)
Jessica Paquette2b404d02019-07-23 16:09:42 +00004293 return None;
4294
Jessica Paquette68499112019-07-24 22:49:42 +00004295 unsigned OffsetOpc = OffsetInst->getOpcode();
4296 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
Jessica Paquette2b404d02019-07-23 16:09:42 +00004297 return None;
4298
Jessica Paquette68499112019-07-24 22:49:42 +00004299 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4300 return None;
4301
4302 // Now, try to find the specific G_CONSTANT. Start by assuming that the
4303 // register we will offset is the LHS, and the register containing the
4304 // constant is the RHS.
4305 Register OffsetReg = OffsetInst->getOperand(1).getReg();
4306 Register ConstantReg = OffsetInst->getOperand(2).getReg();
4307 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4308 if (!ValAndVReg) {
4309 // We didn't get a constant on the RHS. If the opcode is a shift, then
4310 // we're done.
4311 if (OffsetOpc == TargetOpcode::G_SHL)
4312 return None;
4313
4314 // If we have a G_MUL, we can use either register. Try looking at the RHS.
4315 std::swap(OffsetReg, ConstantReg);
4316 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4317 if (!ValAndVReg)
4318 return None;
4319 }
4320
Jessica Paquette2b404d02019-07-23 16:09:42 +00004321 // The value must fit into 3 bits, and must be positive. Make sure that is
4322 // true.
4323 int64_t ImmVal = ValAndVReg->Value;
Jessica Paquette68499112019-07-24 22:49:42 +00004324
4325 // Since we're going to pull this into a shift, the constant value must be
4326 // a power of 2. If we got a multiply, then we need to check this.
4327 if (OffsetOpc == TargetOpcode::G_MUL) {
4328 if (!isPowerOf2_32(ImmVal))
4329 return None;
4330
4331 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4332 ImmVal = Log2_32(ImmVal);
4333 }
4334
Jessica Paquette2b404d02019-07-23 16:09:42 +00004335 if ((ImmVal & 0x7) != ImmVal)
4336 return None;
4337
4338 // We are only allowed to shift by LegalShiftVal. This shift value is built
4339 // into the instruction, so we can't just use whatever we want.
4340 if (ImmVal != LegalShiftVal)
4341 return None;
4342
4343 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4344 // offset. Signify that we are shifting by setting the shift flag to 1.
4345 return {{
4346 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
Jessica Paquette68499112019-07-24 22:49:42 +00004347 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
Jessica Paquette2b404d02019-07-23 16:09:42 +00004348 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4349 [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4350 }};
4351}
4352
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00004353/// This is used for computing addresses like this:
4354///
4355/// ldr x1, [x2, x3]
4356///
4357/// Where x2 is the base register, and x3 is an offset register.
4358///
4359/// When possible (or profitable) to fold a G_GEP into the address calculation,
4360/// this will do so. Otherwise, it will return None.
4361InstructionSelector::ComplexRendererFns
4362AArch64InstructionSelector::selectAddrModeRegisterOffset(
4363 MachineOperand &Root) const {
4364 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4365
Jessica Paquette7a1dcc52019-07-18 21:50:11 +00004366 // We need a GEP.
4367 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
4368 if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
4369 return None;
4370
4371 // If this is used more than once, let's not bother folding.
4372 // TODO: Check if they are memory ops. If they are, then we can still fold
4373 // without having to recompute anything.
4374 if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
4375 return None;
4376
4377 // Base is the GEP's LHS, offset is its RHS.
4378 return {{
4379 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4380 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4381 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4382 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4383 }};
4384}
4385
Jessica Paquette2b404d02019-07-23 16:09:42 +00004386/// This is intended to be equivalent to selectAddrModeXRO in
4387/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
4388InstructionSelector::ComplexRendererFns
4389AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
4390 unsigned SizeInBytes) const {
4391 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4392
4393 // If we have a constant offset, then we probably don't want to match a
4394 // register offset.
4395 if (isBaseWithConstantOffset(Root, MRI))
4396 return None;
4397
4398 // Try to fold shifts into the addressing mode.
4399 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
4400 if (AddrModeFns)
4401 return AddrModeFns;
4402
4403 // If that doesn't work, see if it's possible to fold in registers from
4404 // a GEP.
4405 return selectAddrModeRegisterOffset(Root);
4406}
4407
Daniel Sandersea8711b2017-10-16 03:36:29 +00004408/// Select a "register plus unscaled signed 9-bit immediate" address. This
4409/// should only match when there is an offset that is not valid for a scaled
4410/// immediate addressing mode. The "Size" argument is the size in bytes of the
4411/// memory reference, which is needed here to know what is valid for a scaled
4412/// immediate.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00004413InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00004414AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
4415 unsigned Size) const {
4416 MachineRegisterInfo &MRI =
4417 Root.getParent()->getParent()->getParent()->getRegInfo();
4418
4419 if (!Root.isReg())
4420 return None;
4421
4422 if (!isBaseWithConstantOffset(Root, MRI))
4423 return None;
4424
4425 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4426 if (!RootDef)
4427 return None;
4428
4429 MachineOperand &OffImm = RootDef->getOperand(2);
4430 if (!OffImm.isReg())
4431 return None;
4432 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
4433 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
4434 return None;
4435 int64_t RHSC;
4436 MachineOperand &RHSOp1 = RHS->getOperand(1);
4437 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
4438 return None;
4439 RHSC = RHSOp1.getCImm()->getSExtValue();
4440
4441 // If the offset is valid as a scaled immediate, don't match here.
4442 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
4443 return None;
4444 if (RHSC >= -256 && RHSC < 256) {
4445 MachineOperand &Base = RootDef->getOperand(1);
4446 return {{
4447 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
4448 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
4449 }};
4450 }
4451 return None;
4452}
4453
4454/// Select a "register plus scaled unsigned 12-bit immediate" address. The
4455/// "Size" argument is the size in bytes of the memory reference, which
4456/// determines the scale.
Daniel Sanders1e4569f2017-10-20 20:55:29 +00004457InstructionSelector::ComplexRendererFns
Daniel Sandersea8711b2017-10-16 03:36:29 +00004458AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
4459 unsigned Size) const {
4460 MachineRegisterInfo &MRI =
4461 Root.getParent()->getParent()->getParent()->getRegInfo();
4462
4463 if (!Root.isReg())
4464 return None;
4465
4466 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
4467 if (!RootDef)
4468 return None;
4469
4470 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
4471 return {{
4472 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
4473 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4474 }};
4475 }
4476
4477 if (isBaseWithConstantOffset(Root, MRI)) {
4478 MachineOperand &LHS = RootDef->getOperand(1);
4479 MachineOperand &RHS = RootDef->getOperand(2);
4480 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
4481 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
4482 if (LHSDef && RHSDef) {
4483 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
4484 unsigned Scale = Log2_32(Size);
4485 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
4486 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
Daniel Sanders01805b62017-10-16 05:39:30 +00004487 return {{
4488 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
4489 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4490 }};
4491
Daniel Sandersea8711b2017-10-16 03:36:29 +00004492 return {{
4493 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
4494 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
4495 }};
4496 }
4497 }
4498 }
4499
4500 // Before falling back to our general case, check if the unscaled
4501 // instructions can handle this. If so, that's preferable.
4502 if (selectAddrModeUnscaled(Root, Size).hasValue())
4503 return None;
4504
4505 return {{
4506 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
4507 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4508 }};
4509}
4510
Jessica Paquette9a95e792019-08-20 22:18:06 +00004511/// Given a shift instruction, return the correct shift type for that
4512/// instruction.
4513static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
4514 // TODO: Handle AArch64_AM::ROR
4515 switch (MI.getOpcode()) {
4516 default:
4517 return AArch64_AM::InvalidShiftExtend;
4518 case TargetOpcode::G_SHL:
4519 return AArch64_AM::LSL;
4520 case TargetOpcode::G_LSHR:
4521 return AArch64_AM::LSR;
4522 case TargetOpcode::G_ASHR:
4523 return AArch64_AM::ASR;
4524 }
4525}
4526
4527/// Select a "shifted register" operand. If the value is not shifted, set the
4528/// shift operand to a default value of "lsl 0".
4529///
4530/// TODO: Allow shifted register to be rotated in logical instructions.
4531InstructionSelector::ComplexRendererFns
4532AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
4533 if (!Root.isReg())
4534 return None;
4535 MachineRegisterInfo &MRI =
4536 Root.getParent()->getParent()->getParent()->getRegInfo();
4537
4538 // Check if the operand is defined by an instruction which corresponds to
4539 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
4540 //
4541 // TODO: Handle AArch64_AM::ROR for logical instructions.
4542 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
4543 if (!ShiftInst)
4544 return None;
4545 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
4546 if (ShType == AArch64_AM::InvalidShiftExtend)
4547 return None;
4548 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
4549 return None;
4550
4551 // Need an immediate on the RHS.
4552 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
4553 auto Immed = getImmedFromMO(ShiftRHS);
4554 if (!Immed)
4555 return None;
4556
4557 // We have something that we can fold. Fold in the shift's LHS and RHS into
4558 // the instruction.
4559 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
4560 Register ShiftReg = ShiftLHS.getReg();
4561
4562 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
4563 unsigned Val = *Immed & (NumBits - 1);
4564 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
4565
4566 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
4567 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
4568}
4569
Volkan Kelesf7f25682018-01-16 18:44:05 +00004570void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
4571 const MachineInstr &MI) const {
4572 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4573 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4574 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
4575 assert(CstVal && "Expected constant value");
4576 MIB.addImm(CstVal.getValue());
4577}
4578
Jessica Paquettee6c299b2019-08-20 22:31:25 +00004579void AArch64InstructionSelector::renderLogicalImm32(
4580 MachineInstrBuilder &MIB, const MachineInstr &I) const {
4581 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4582 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4583 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
4584 MIB.addImm(Enc);
4585}
4586
4587void AArch64InstructionSelector::renderLogicalImm64(
4588 MachineInstrBuilder &MIB, const MachineInstr &I) const {
4589 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
4590 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
4591 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
4592 MIB.addImm(Enc);
4593}
4594
Daniel Sanders0b5293f2017-04-06 09:49:34 +00004595namespace llvm {
4596InstructionSelector *
4597createAArch64InstructionSelector(const AArch64TargetMachine &TM,
4598 AArch64Subtarget &Subtarget,
4599 AArch64RegisterBankInfo &RBI) {
4600 return new AArch64InstructionSelector(TM, Subtarget, RBI);
4601}
4602}