blob: 1bf882b6682452ccb0392cd42ec665cd5dc4b3b5 [file] [log] [blame]
Eugene Zelenko96d933d2017-07-25 23:51:02 +00001//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Tim Northover3b0846e2014-05-24 12:50:23 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that performs load / store related peephole
10// optimizations. This pass should be run after register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64InstrInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000015#include "AArch64Subtarget.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000016#include "MCTargetDesc/AArch64AddressingModes.h"
17#include "llvm/ADT/BitVector.h"
Chad Rosierce8e5ab2015-05-21 21:36:46 +000018#include "llvm/ADT/SmallVector.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +000019#include "llvm/ADT/Statistic.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000020#include "llvm/ADT/StringRef.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000021#include "llvm/ADT/iterator_range.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000022#include "llvm/Analysis/AliasAnalysis.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000023#include "llvm/CodeGen/MachineBasicBlock.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000024#include "llvm/CodeGen/MachineFunction.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000025#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstr.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000028#include "llvm/CodeGen/MachineOperand.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000029#include "llvm/CodeGen/TargetRegisterInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000030#include "llvm/IR/DebugLoc.h"
31#include "llvm/MC/MCRegisterInfo.h"
32#include "llvm/Pass.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000033#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
Florian Hahn17554b82019-12-11 09:59:18 +000035#include "llvm/Support/DebugCounter.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000036#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/raw_ostream.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000038#include <cassert>
39#include <cstdint>
Florian Hahn17554b82019-12-11 09:59:18 +000040#include <functional>
Eugene Zelenko11f69072017-01-25 00:29:26 +000041#include <iterator>
42#include <limits>
43
Tim Northover3b0846e2014-05-24 12:50:23 +000044using namespace llvm;
45
46#define DEBUG_TYPE "aarch64-ldst-opt"
47
Tim Northover3b0846e2014-05-24 12:50:23 +000048STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
49STATISTIC(NumPostFolded, "Number of post-index updates folded");
50STATISTIC(NumPreFolded, "Number of pre-index updates folded");
51STATISTIC(NumUnscaledPairCreated,
52 "Number of load/store from unscaled generated");
Jun Bum Lim80ec0d32015-11-20 21:14:07 +000053STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
Jun Bum Lim6755c3b2015-12-22 16:36:16 +000054STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
Tim Northover3b0846e2014-05-24 12:50:23 +000055
Florian Hahn17554b82019-12-11 09:59:18 +000056DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
57 "Controls which pairs are considered for renaming");
58
Chad Rosier35706ad2016-02-04 21:26:02 +000059// The LdStLimit limits how far we search for load/store pairs.
60static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +000061 cl::init(20), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000062
Chad Rosier35706ad2016-02-04 21:26:02 +000063// The UpdateLimit limits how far we search for update instructions when we form
64// pre-/post-index instructions.
65static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
66 cl::Hidden);
67
Chad Rosier96530b32015-08-05 13:44:51 +000068#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
69
Tim Northover3b0846e2014-05-24 12:50:23 +000070namespace {
Chad Rosier96a18a92015-07-21 17:42:04 +000071
Eugene Zelenko96d933d2017-07-25 23:51:02 +000072using LdStPairFlags = struct LdStPairFlags {
Chad Rosier96a18a92015-07-21 17:42:04 +000073 // If a matching instruction is found, MergeForward is set to true if the
74 // merge is to remove the first instruction and replace the second with
75 // a pair-wise insn, and false if the reverse is true.
Eugene Zelenko11f69072017-01-25 00:29:26 +000076 bool MergeForward = false;
Chad Rosier96a18a92015-07-21 17:42:04 +000077
78 // SExtIdx gives the index of the result of the load pair that must be
79 // extended. The value of SExtIdx assumes that the paired load produces the
80 // value in this order: (I, returned iterator), i.e., -1 means no value has
81 // to be extended, 0 means I, and 1 means the returned iterator.
Eugene Zelenko11f69072017-01-25 00:29:26 +000082 int SExtIdx = -1;
Chad Rosier96a18a92015-07-21 17:42:04 +000083
Florian Hahn17554b82019-12-11 09:59:18 +000084 // If not none, RenameReg can be used to rename the result register of the
85 // first store in a pair. Currently this only works when merging stores
86 // forward.
87 Optional<MCPhysReg> RenameReg = None;
88
Eugene Zelenko11f69072017-01-25 00:29:26 +000089 LdStPairFlags() = default;
Chad Rosier96a18a92015-07-21 17:42:04 +000090
91 void setMergeForward(bool V = true) { MergeForward = V; }
92 bool getMergeForward() const { return MergeForward; }
93
94 void setSExtIdx(int V) { SExtIdx = V; }
95 int getSExtIdx() const { return SExtIdx; }
Florian Hahn17554b82019-12-11 09:59:18 +000096
97 void setRenameReg(MCPhysReg R) { RenameReg = R; }
98 void clearRenameReg() { RenameReg = None; }
99 Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000100};
Chad Rosier96a18a92015-07-21 17:42:04 +0000101
Tim Northover3b0846e2014-05-24 12:50:23 +0000102struct AArch64LoadStoreOpt : public MachineFunctionPass {
103 static char ID;
Eugene Zelenko11f69072017-01-25 00:29:26 +0000104
Jun Bum Lim22fe15e2015-11-06 16:27:47 +0000105 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
Chad Rosier96530b32015-08-05 13:44:51 +0000106 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
107 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000108
Chad Rosiera69dcb62017-03-17 14:19:55 +0000109 AliasAnalysis *AA;
Tim Northover3b0846e2014-05-24 12:50:23 +0000110 const AArch64InstrInfo *TII;
111 const TargetRegisterInfo *TRI;
Oliver Stannardd414c992015-11-10 11:04:18 +0000112 const AArch64Subtarget *Subtarget;
Tim Northover3b0846e2014-05-24 12:50:23 +0000113
Jun Bum Lim47aece12018-04-27 18:44:37 +0000114 // Track which register units have been modified and used.
115 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
Florian Hahn17554b82019-12-11 09:59:18 +0000116 LiveRegUnits DefinedInBB;
Chad Rosierbba881e2016-02-02 15:02:30 +0000117
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000118 void getAnalysisUsage(AnalysisUsage &AU) const override {
Chad Rosiera69dcb62017-03-17 14:19:55 +0000119 AU.addRequired<AAResultsWrapperPass>();
120 MachineFunctionPass::getAnalysisUsage(AU);
121 }
122
Tim Northover3b0846e2014-05-24 12:50:23 +0000123 // Scan the instructions looking for a load/store that can be combined
124 // with the current instruction into a load/store pair.
125 // Return the matching instruction if one is found, else MBB->end().
Tim Northover3b0846e2014-05-24 12:50:23 +0000126 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +0000127 LdStPairFlags &Flags,
Jun Bum Limcf974432016-03-31 14:47:24 +0000128 unsigned Limit,
129 bool FindNarrowMerge);
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000130
131 // Scan the instructions looking for a store that writes to the address from
132 // which the current load instruction reads. Return true if one is found.
133 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
134 MachineBasicBlock::iterator &StoreI);
135
Chad Rosierd6daac42016-11-07 15:27:22 +0000136 // Merge the two instructions indicated into a wider narrow store instruction.
Chad Rosierb5933d72016-02-09 19:02:12 +0000137 MachineBasicBlock::iterator
Chad Rosierd6daac42016-11-07 15:27:22 +0000138 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
139 MachineBasicBlock::iterator MergeMI,
140 const LdStPairFlags &Flags);
Chad Rosierb5933d72016-02-09 19:02:12 +0000141
Tim Northover3b0846e2014-05-24 12:50:23 +0000142 // Merge the two instructions indicated into a single pair-wise instruction.
Tim Northover3b0846e2014-05-24 12:50:23 +0000143 MachineBasicBlock::iterator
144 mergePairedInsns(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +0000145 MachineBasicBlock::iterator Paired,
Chad Rosierfe5399f2015-07-21 17:47:56 +0000146 const LdStPairFlags &Flags);
Tim Northover3b0846e2014-05-24 12:50:23 +0000147
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000148 // Promote the load that reads directly from the address stored to.
149 MachineBasicBlock::iterator
150 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
151 MachineBasicBlock::iterator StoreI);
152
Tim Northover3b0846e2014-05-24 12:50:23 +0000153 // Scan the instruction list to find a base register update that can
154 // be combined with the current instruction (a load or store) using
155 // pre or post indexed addressing with writeback. Scan forwards.
156 MachineBasicBlock::iterator
Chad Rosier234bf6f2016-01-18 21:56:40 +0000157 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
Chad Rosier35706ad2016-02-04 21:26:02 +0000158 int UnscaledOffset, unsigned Limit);
Tim Northover3b0846e2014-05-24 12:50:23 +0000159
160 // Scan the instruction list to find a base register update that can
161 // be combined with the current instruction (a load or store) using
162 // pre or post indexed addressing with writeback. Scan backwards.
163 MachineBasicBlock::iterator
Chad Rosier35706ad2016-02-04 21:26:02 +0000164 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
Tim Northover3b0846e2014-05-24 12:50:23 +0000165
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000166 // Find an instruction that updates the base register of the ld/st
167 // instruction.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000168 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000169 unsigned BaseReg, int Offset);
170
Chad Rosier2dfd3542015-09-23 13:51:44 +0000171 // Merge a pre- or post-index base register update into a ld/st instruction.
Tim Northover3b0846e2014-05-24 12:50:23 +0000172 MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +0000173 mergeUpdateInsn(MachineBasicBlock::iterator I,
174 MachineBasicBlock::iterator Update, bool IsPreIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000175
Chad Rosierd6daac42016-11-07 15:27:22 +0000176 // Find and merge zero store instructions.
177 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000178
Chad Rosier24c46ad2016-02-09 18:10:20 +0000179 // Find and pair ldr/str instructions.
180 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
181
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000182 // Find and promote load instructions which read directly from store.
183 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
184
Evandro Menezes5ba804b2017-11-15 21:06:22 +0000185 // Find and merge a base register updates before or after a ld/st instruction.
186 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
187
Chad Rosierd6daac42016-11-07 15:27:22 +0000188 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
Tim Northover3b0846e2014-05-24 12:50:23 +0000189
190 bool runOnMachineFunction(MachineFunction &Fn) override;
191
Derek Schuff1dbf7a52016-04-04 17:09:25 +0000192 MachineFunctionProperties getRequiredProperties() const override {
193 return MachineFunctionProperties().set(
Matthias Braun1eb47362016-08-25 01:27:13 +0000194 MachineFunctionProperties::Property::NoVRegs);
Derek Schuff1dbf7a52016-04-04 17:09:25 +0000195 }
196
Mehdi Amini117296c2016-10-01 02:56:57 +0000197 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
Tim Northover3b0846e2014-05-24 12:50:23 +0000198};
Eugene Zelenko11f69072017-01-25 00:29:26 +0000199
Tim Northover3b0846e2014-05-24 12:50:23 +0000200char AArch64LoadStoreOpt::ID = 0;
Eugene Zelenko11f69072017-01-25 00:29:26 +0000201
202} // end anonymous namespace
Tim Northover3b0846e2014-05-24 12:50:23 +0000203
Chad Rosier96530b32015-08-05 13:44:51 +0000204INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
205 AARCH64_LOAD_STORE_OPT_NAME, false, false)
206
Jun Bum Lim80ec0d32015-11-20 21:14:07 +0000207static bool isNarrowStore(unsigned Opc) {
208 switch (Opc) {
209 default:
210 return false;
211 case AArch64::STRBBui:
212 case AArch64::STURBBi:
213 case AArch64::STRHHui:
214 case AArch64::STURHHi:
215 return true;
216 }
217}
218
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000219// These instruction set memory tag and either keep memory contents unchanged or
220// set it to zero, ignoring the address part of the source register.
221static bool isTagStore(const MachineInstr &MI) {
222 switch (MI.getOpcode()) {
223 default:
224 return false;
225 case AArch64::STGOffset:
226 case AArch64::STZGOffset:
227 case AArch64::ST2GOffset:
228 case AArch64::STZ2GOffset:
229 return true;
230 }
231}
232
Chad Rosier32d4d372015-09-29 16:07:32 +0000233// Scaling factor for unscaled load or store.
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000234static int getMemScale(const MachineInstr &MI) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000235 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000236 default:
Chad Rosierdabe2532015-09-29 18:26:15 +0000237 llvm_unreachable("Opcode has unknown scale!");
238 case AArch64::LDRBBui:
Jun Bum Lim4c35cca2015-11-19 17:21:41 +0000239 case AArch64::LDURBBi:
240 case AArch64::LDRSBWui:
241 case AArch64::LDURSBWi:
Chad Rosierdabe2532015-09-29 18:26:15 +0000242 case AArch64::STRBBui:
Jun Bum Lim80ec0d32015-11-20 21:14:07 +0000243 case AArch64::STURBBi:
Chad Rosierdabe2532015-09-29 18:26:15 +0000244 return 1;
245 case AArch64::LDRHHui:
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000246 case AArch64::LDURHHi:
Jun Bum Lim4c35cca2015-11-19 17:21:41 +0000247 case AArch64::LDRSHWui:
248 case AArch64::LDURSHWi:
Chad Rosierdabe2532015-09-29 18:26:15 +0000249 case AArch64::STRHHui:
Jun Bum Lim80ec0d32015-11-20 21:14:07 +0000250 case AArch64::STURHHi:
Chad Rosierdabe2532015-09-29 18:26:15 +0000251 return 2;
Chad Rosiera4d32172015-09-29 14:57:10 +0000252 case AArch64::LDRSui:
253 case AArch64::LDURSi:
254 case AArch64::LDRSWui:
255 case AArch64::LDURSWi:
256 case AArch64::LDRWui:
257 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000258 case AArch64::STRSui:
259 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000260 case AArch64::STRWui:
261 case AArch64::STURWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000262 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000263 case AArch64::LDPSWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000264 case AArch64::LDPWi:
265 case AArch64::STPSi:
266 case AArch64::STPWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000267 return 4;
Chad Rosiera4d32172015-09-29 14:57:10 +0000268 case AArch64::LDRDui:
269 case AArch64::LDURDi:
270 case AArch64::LDRXui:
271 case AArch64::LDURXi:
272 case AArch64::STRDui:
273 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000274 case AArch64::STRXui:
275 case AArch64::STURXi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000276 case AArch64::LDPDi:
277 case AArch64::LDPXi:
278 case AArch64::STPDi:
279 case AArch64::STPXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000280 return 8;
Tim Northover3b0846e2014-05-24 12:50:23 +0000281 case AArch64::LDRQui:
282 case AArch64::LDURQi:
Chad Rosiera4d32172015-09-29 14:57:10 +0000283 case AArch64::STRQui:
284 case AArch64::STURQi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000285 case AArch64::LDPQi:
286 case AArch64::STPQi:
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000287 case AArch64::STGOffset:
288 case AArch64::STZGOffset:
289 case AArch64::ST2GOffset:
290 case AArch64::STZ2GOffset:
291 case AArch64::STGPi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000292 return 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000293 }
294}
295
Quentin Colombet66b61632015-03-06 22:42:10 +0000296static unsigned getMatchingNonSExtOpcode(unsigned Opc,
297 bool *IsValidLdStrOpc = nullptr) {
298 if (IsValidLdStrOpc)
299 *IsValidLdStrOpc = true;
300 switch (Opc) {
301 default:
302 if (IsValidLdStrOpc)
303 *IsValidLdStrOpc = false;
Eugene Zelenko11f69072017-01-25 00:29:26 +0000304 return std::numeric_limits<unsigned>::max();
Quentin Colombet66b61632015-03-06 22:42:10 +0000305 case AArch64::STRDui:
306 case AArch64::STURDi:
307 case AArch64::STRQui:
308 case AArch64::STURQi:
Jun Bum Lim80ec0d32015-11-20 21:14:07 +0000309 case AArch64::STRBBui:
310 case AArch64::STURBBi:
311 case AArch64::STRHHui:
312 case AArch64::STURHHi:
Quentin Colombet66b61632015-03-06 22:42:10 +0000313 case AArch64::STRWui:
314 case AArch64::STURWi:
315 case AArch64::STRXui:
316 case AArch64::STURXi:
317 case AArch64::LDRDui:
318 case AArch64::LDURDi:
319 case AArch64::LDRQui:
320 case AArch64::LDURQi:
321 case AArch64::LDRWui:
322 case AArch64::LDURWi:
323 case AArch64::LDRXui:
324 case AArch64::LDURXi:
325 case AArch64::STRSui:
326 case AArch64::STURSi:
327 case AArch64::LDRSui:
328 case AArch64::LDURSi:
329 return Opc;
330 case AArch64::LDRSWui:
331 return AArch64::LDRWui;
332 case AArch64::LDURSWi:
333 return AArch64::LDURWi;
334 }
335}
336
Jun Bum Lim1de2d442016-02-05 20:02:03 +0000337static unsigned getMatchingWideOpcode(unsigned Opc) {
338 switch (Opc) {
339 default:
340 llvm_unreachable("Opcode has no wide equivalent!");
341 case AArch64::STRBBui:
342 return AArch64::STRHHui;
343 case AArch64::STRHHui:
344 return AArch64::STRWui;
345 case AArch64::STURBBi:
346 return AArch64::STURHHi;
347 case AArch64::STURHHi:
348 return AArch64::STURWi;
Jun Bum Lim397eb7b2016-02-12 15:25:39 +0000349 case AArch64::STURWi:
350 return AArch64::STURXi;
351 case AArch64::STRWui:
352 return AArch64::STRXui;
Jun Bum Lim1de2d442016-02-05 20:02:03 +0000353 }
354}
355
Tim Northover3b0846e2014-05-24 12:50:23 +0000356static unsigned getMatchingPairOpcode(unsigned Opc) {
357 switch (Opc) {
358 default:
359 llvm_unreachable("Opcode has no pairwise equivalent!");
360 case AArch64::STRSui:
361 case AArch64::STURSi:
362 return AArch64::STPSi;
363 case AArch64::STRDui:
364 case AArch64::STURDi:
365 return AArch64::STPDi;
366 case AArch64::STRQui:
367 case AArch64::STURQi:
368 return AArch64::STPQi;
369 case AArch64::STRWui:
370 case AArch64::STURWi:
371 return AArch64::STPWi;
372 case AArch64::STRXui:
373 case AArch64::STURXi:
374 return AArch64::STPXi;
375 case AArch64::LDRSui:
376 case AArch64::LDURSi:
377 return AArch64::LDPSi;
378 case AArch64::LDRDui:
379 case AArch64::LDURDi:
380 return AArch64::LDPDi;
381 case AArch64::LDRQui:
382 case AArch64::LDURQi:
383 return AArch64::LDPQi;
384 case AArch64::LDRWui:
385 case AArch64::LDURWi:
386 return AArch64::LDPWi;
387 case AArch64::LDRXui:
388 case AArch64::LDURXi:
389 return AArch64::LDPXi;
Quentin Colombet29f55332015-01-24 01:25:54 +0000390 case AArch64::LDRSWui:
391 case AArch64::LDURSWi:
392 return AArch64::LDPSWi;
Tim Northover3b0846e2014-05-24 12:50:23 +0000393 }
394}
395
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000396static unsigned isMatchingStore(MachineInstr &LoadInst,
397 MachineInstr &StoreInst) {
398 unsigned LdOpc = LoadInst.getOpcode();
399 unsigned StOpc = StoreInst.getOpcode();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000400 switch (LdOpc) {
401 default:
402 llvm_unreachable("Unsupported load instruction!");
403 case AArch64::LDRBBui:
404 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
405 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
406 case AArch64::LDURBBi:
407 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
408 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
409 case AArch64::LDRHHui:
410 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
411 StOpc == AArch64::STRXui;
412 case AArch64::LDURHHi:
413 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
414 StOpc == AArch64::STURXi;
415 case AArch64::LDRWui:
416 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
417 case AArch64::LDURWi:
418 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
419 case AArch64::LDRXui:
420 return StOpc == AArch64::STRXui;
421 case AArch64::LDURXi:
422 return StOpc == AArch64::STURXi;
423 }
424}
425
Tim Northover3b0846e2014-05-24 12:50:23 +0000426static unsigned getPreIndexedOpcode(unsigned Opc) {
Chad Rosier14fc82a2017-08-04 16:44:06 +0000427 // FIXME: We don't currently support creating pre-indexed loads/stores when
428 // the load or store is the unscaled version. If we decide to perform such an
429 // optimization in the future the cases for the unscaled loads/stores will
430 // need to be added here.
Tim Northover3b0846e2014-05-24 12:50:23 +0000431 switch (Opc) {
432 default:
433 llvm_unreachable("Opcode has no pre-indexed equivalent!");
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000434 case AArch64::STRSui:
435 return AArch64::STRSpre;
436 case AArch64::STRDui:
437 return AArch64::STRDpre;
438 case AArch64::STRQui:
439 return AArch64::STRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000440 case AArch64::STRBBui:
441 return AArch64::STRBBpre;
442 case AArch64::STRHHui:
443 return AArch64::STRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000444 case AArch64::STRWui:
445 return AArch64::STRWpre;
446 case AArch64::STRXui:
447 return AArch64::STRXpre;
448 case AArch64::LDRSui:
449 return AArch64::LDRSpre;
450 case AArch64::LDRDui:
451 return AArch64::LDRDpre;
452 case AArch64::LDRQui:
453 return AArch64::LDRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000454 case AArch64::LDRBBui:
455 return AArch64::LDRBBpre;
456 case AArch64::LDRHHui:
457 return AArch64::LDRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000458 case AArch64::LDRWui:
459 return AArch64::LDRWpre;
460 case AArch64::LDRXui:
461 return AArch64::LDRXpre;
Quentin Colombet29f55332015-01-24 01:25:54 +0000462 case AArch64::LDRSWui:
463 return AArch64::LDRSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000464 case AArch64::LDPSi:
465 return AArch64::LDPSpre;
Chad Rosier43150122015-09-29 20:39:55 +0000466 case AArch64::LDPSWi:
467 return AArch64::LDPSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000468 case AArch64::LDPDi:
469 return AArch64::LDPDpre;
470 case AArch64::LDPQi:
471 return AArch64::LDPQpre;
472 case AArch64::LDPWi:
473 return AArch64::LDPWpre;
474 case AArch64::LDPXi:
475 return AArch64::LDPXpre;
476 case AArch64::STPSi:
477 return AArch64::STPSpre;
478 case AArch64::STPDi:
479 return AArch64::STPDpre;
480 case AArch64::STPQi:
481 return AArch64::STPQpre;
482 case AArch64::STPWi:
483 return AArch64::STPWpre;
484 case AArch64::STPXi:
485 return AArch64::STPXpre;
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000486 case AArch64::STGOffset:
487 return AArch64::STGPreIndex;
488 case AArch64::STZGOffset:
489 return AArch64::STZGPreIndex;
490 case AArch64::ST2GOffset:
491 return AArch64::ST2GPreIndex;
492 case AArch64::STZ2GOffset:
493 return AArch64::STZ2GPreIndex;
494 case AArch64::STGPi:
495 return AArch64::STGPpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000496 }
497}
498
499static unsigned getPostIndexedOpcode(unsigned Opc) {
500 switch (Opc) {
501 default:
502 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
503 case AArch64::STRSui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000504 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000505 return AArch64::STRSpost;
506 case AArch64::STRDui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000507 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000508 return AArch64::STRDpost;
509 case AArch64::STRQui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000510 case AArch64::STURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000511 return AArch64::STRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000512 case AArch64::STRBBui:
513 return AArch64::STRBBpost;
514 case AArch64::STRHHui:
515 return AArch64::STRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000516 case AArch64::STRWui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000517 case AArch64::STURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000518 return AArch64::STRWpost;
519 case AArch64::STRXui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000520 case AArch64::STURXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000521 return AArch64::STRXpost;
522 case AArch64::LDRSui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000523 case AArch64::LDURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000524 return AArch64::LDRSpost;
525 case AArch64::LDRDui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000526 case AArch64::LDURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000527 return AArch64::LDRDpost;
528 case AArch64::LDRQui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000529 case AArch64::LDURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000530 return AArch64::LDRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000531 case AArch64::LDRBBui:
532 return AArch64::LDRBBpost;
533 case AArch64::LDRHHui:
534 return AArch64::LDRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000535 case AArch64::LDRWui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000536 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000537 return AArch64::LDRWpost;
538 case AArch64::LDRXui:
Chad Rosier14fc82a2017-08-04 16:44:06 +0000539 case AArch64::LDURXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000540 return AArch64::LDRXpost;
Quentin Colombet29f55332015-01-24 01:25:54 +0000541 case AArch64::LDRSWui:
542 return AArch64::LDRSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000543 case AArch64::LDPSi:
544 return AArch64::LDPSpost;
Chad Rosier43150122015-09-29 20:39:55 +0000545 case AArch64::LDPSWi:
546 return AArch64::LDPSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000547 case AArch64::LDPDi:
548 return AArch64::LDPDpost;
549 case AArch64::LDPQi:
550 return AArch64::LDPQpost;
551 case AArch64::LDPWi:
552 return AArch64::LDPWpost;
553 case AArch64::LDPXi:
554 return AArch64::LDPXpost;
555 case AArch64::STPSi:
556 return AArch64::STPSpost;
557 case AArch64::STPDi:
558 return AArch64::STPDpost;
559 case AArch64::STPQi:
560 return AArch64::STPQpost;
561 case AArch64::STPWi:
562 return AArch64::STPWpost;
563 case AArch64::STPXi:
564 return AArch64::STPXpost;
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000565 case AArch64::STGOffset:
566 return AArch64::STGPostIndex;
567 case AArch64::STZGOffset:
568 return AArch64::STZGPostIndex;
569 case AArch64::ST2GOffset:
570 return AArch64::ST2GPostIndex;
571 case AArch64::STZ2GOffset:
572 return AArch64::STZ2GPostIndex;
573 case AArch64::STGPi:
574 return AArch64::STGPpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000575 }
576}
577
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000578static bool isPairedLdSt(const MachineInstr &MI) {
579 switch (MI.getOpcode()) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000580 default:
581 return false;
582 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000583 case AArch64::LDPSWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000584 case AArch64::LDPDi:
585 case AArch64::LDPQi:
586 case AArch64::LDPWi:
587 case AArch64::LDPXi:
588 case AArch64::STPSi:
589 case AArch64::STPDi:
590 case AArch64::STPQi:
591 case AArch64::STPWi:
592 case AArch64::STPXi:
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000593 case AArch64::STGPi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000594 return true;
595 }
596}
597
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000598// Returns the scale and offset range of pre/post indexed variants of MI.
599static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
600 int &MinOffset, int &MaxOffset) {
601 bool IsPaired = isPairedLdSt(MI);
602 bool IsTagStore = isTagStore(MI);
603 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
604 // as in the "unsigned offset" variant.
605 // All other pre/post indexed ldst instructions are unscaled.
606 Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
607
608 if (IsPaired) {
609 MinOffset = -64;
610 MaxOffset = 63;
611 } else {
612 MinOffset = -256;
613 MaxOffset = 255;
614 }
615}
616
Florian Hahn17554b82019-12-11 09:59:18 +0000617static MachineOperand &getLdStRegOp(MachineInstr &MI,
618 unsigned PairedRegOp = 0) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000619 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
620 unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000621 return MI.getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000622}
623
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000624static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000625 unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000626 return MI.getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000627}
628
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000629static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000630 unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000631 return MI.getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000632}
633
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000634static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
635 MachineInstr &StoreInst,
Chad Rosiere4e15ba2016-03-09 17:29:48 +0000636 const AArch64InstrInfo *TII) {
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000637 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
638 int LoadSize = getMemScale(LoadInst);
639 int StoreSize = getMemScale(StoreInst);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000640 int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000641 ? getLdStOffsetOp(StoreInst).getImm()
642 : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000643 int UnscaledLdOffset = TII->isUnscaledLdSt(LoadInst)
Jun Bum Lim6755c3b2015-12-22 16:36:16 +0000644 ? getLdStOffsetOp(LoadInst).getImm()
645 : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
646 return (UnscaledStOffset <= UnscaledLdOffset) &&
647 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
648}
649
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000650static bool isPromotableZeroStoreInst(MachineInstr &MI) {
Chad Rosierd6daac42016-11-07 15:27:22 +0000651 unsigned Opc = MI.getOpcode();
652 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
653 isNarrowStore(Opc)) &&
Jun Bum Lim397eb7b2016-02-12 15:25:39 +0000654 getLdStRegOp(MI).getReg() == AArch64::WZR;
655}
656
Evandro Menezes5ba804b2017-11-15 21:06:22 +0000657static bool isPromotableLoadFromStore(MachineInstr &MI) {
658 switch (MI.getOpcode()) {
659 default:
660 return false;
661 // Scaled instructions.
662 case AArch64::LDRBBui:
663 case AArch64::LDRHHui:
664 case AArch64::LDRWui:
665 case AArch64::LDRXui:
666 // Unscaled instructions.
667 case AArch64::LDURBBi:
668 case AArch64::LDURHHi:
669 case AArch64::LDURWi:
670 case AArch64::LDURXi:
671 return true;
672 }
673}
674
675static bool isMergeableLdStUpdate(MachineInstr &MI) {
676 unsigned Opc = MI.getOpcode();
677 switch (Opc) {
678 default:
679 return false;
680 // Scaled instructions.
681 case AArch64::STRSui:
682 case AArch64::STRDui:
683 case AArch64::STRQui:
684 case AArch64::STRXui:
685 case AArch64::STRWui:
686 case AArch64::STRHHui:
687 case AArch64::STRBBui:
688 case AArch64::LDRSui:
689 case AArch64::LDRDui:
690 case AArch64::LDRQui:
691 case AArch64::LDRXui:
692 case AArch64::LDRWui:
693 case AArch64::LDRHHui:
694 case AArch64::LDRBBui:
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +0000695 case AArch64::STGOffset:
696 case AArch64::STZGOffset:
697 case AArch64::ST2GOffset:
698 case AArch64::STZ2GOffset:
699 case AArch64::STGPi:
Evandro Menezes5ba804b2017-11-15 21:06:22 +0000700 // Unscaled instructions.
701 case AArch64::STURSi:
702 case AArch64::STURDi:
703 case AArch64::STURQi:
704 case AArch64::STURWi:
705 case AArch64::STURXi:
706 case AArch64::LDURSi:
707 case AArch64::LDURDi:
708 case AArch64::LDURQi:
709 case AArch64::LDURWi:
710 case AArch64::LDURXi:
711 // Paired instructions.
712 case AArch64::LDPSi:
713 case AArch64::LDPSWi:
714 case AArch64::LDPDi:
715 case AArch64::LDPQi:
716 case AArch64::LDPWi:
717 case AArch64::LDPXi:
718 case AArch64::STPSi:
719 case AArch64::STPDi:
720 case AArch64::STPQi:
721 case AArch64::STPWi:
722 case AArch64::STPXi:
723 // Make sure this is a reg+imm (as opposed to an address reloc).
724 if (!getLdStOffsetOp(MI).isImm())
725 return false;
726
727 return true;
728 }
729}
730
Tim Northover3b0846e2014-05-24 12:50:23 +0000731MachineBasicBlock::iterator
Chad Rosierd6daac42016-11-07 15:27:22 +0000732AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
733 MachineBasicBlock::iterator MergeMI,
734 const LdStPairFlags &Flags) {
735 assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
736 "Expected promotable zero stores.");
737
Tim Northover3b0846e2014-05-24 12:50:23 +0000738 MachineBasicBlock::iterator NextI = I;
739 ++NextI;
740 // If NextI is the second of the two instructions to be merged, we need
741 // to skip one further. Either way we merge will invalidate the iterator,
742 // and we don't need to scan the new instruction, as it's a pairwise
743 // instruction, which we're not considering for further action anyway.
Chad Rosierd7363db2016-02-09 19:09:22 +0000744 if (NextI == MergeMI)
Tim Northover3b0846e2014-05-24 12:50:23 +0000745 ++NextI;
746
Chad Rosierb5933d72016-02-09 19:02:12 +0000747 unsigned Opc = I->getOpcode();
Chad Rosiere4e15ba2016-03-09 17:29:48 +0000748 bool IsScaled = !TII->isUnscaledLdSt(Opc);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000749 int OffsetStride = IsScaled ? 1 : getMemScale(*I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000750
Chad Rosier96a18a92015-07-21 17:42:04 +0000751 bool MergeForward = Flags.getMergeForward();
Tim Northover3b0846e2014-05-24 12:50:23 +0000752 // Insert our new paired instruction after whichever of the paired
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000753 // instructions MergeForward indicates.
Chad Rosierd7363db2016-02-09 19:09:22 +0000754 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000755 // Also based on MergeForward is from where we copy the base register operand
Tim Northover3b0846e2014-05-24 12:50:23 +0000756 // so we get the flags compatible with the input code.
Chad Rosierf77e9092015-08-06 15:50:12 +0000757 const MachineOperand &BaseRegOp =
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000758 MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000759
760 // Which register is Rt and which is Rt2 depends on the offset order.
Davide Italiano5df60662016-11-07 19:11:25 +0000761 MachineInstr *RtMI;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000762 if (getLdStOffsetOp(*I).getImm() ==
Davide Italiano5df60662016-11-07 19:11:25 +0000763 getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000764 RtMI = &*MergeMI;
Davide Italiano5df60662016-11-07 19:11:25 +0000765 else
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000766 RtMI = &*I;
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000767
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000768 int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
Chad Rosier11eedc92016-02-09 19:17:18 +0000769 // Change the scaled offset from small to large type.
770 if (IsScaled) {
771 assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
772 OffsetImm /= 2;
773 }
774
Chad Rosierd6daac42016-11-07 15:27:22 +0000775 // Construct the new instruction.
Chad Rosierc46ef882016-02-09 19:33:42 +0000776 DebugLoc DL = I->getDebugLoc();
777 MachineBasicBlock *MBB = I->getParent();
Jun Bum Lim80ec0d32015-11-20 21:14:07 +0000778 MachineInstrBuilder MIB;
Chad Rosierc46ef882016-02-09 19:33:42 +0000779 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
Jun Bum Lim397eb7b2016-02-12 15:25:39 +0000780 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
Diana Picus116bbab2017-01-13 09:58:52 +0000781 .add(BaseRegOp)
Chad Rosierb5933d72016-02-09 19:02:12 +0000782 .addImm(OffsetImm)
Chandler Carruthc73c0302018-08-16 21:30:05 +0000783 .cloneMergedMemRefs({&*I, &*MergeMI})
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +0000784 .setMIFlags(I->mergeFlagsWith(*MergeMI));
Tim Northover3b0846e2014-05-24 12:50:23 +0000785 (void)MIB;
786
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000787 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
788 LLVM_DEBUG(I->print(dbgs()));
789 LLVM_DEBUG(dbgs() << " ");
790 LLVM_DEBUG(MergeMI->print(dbgs()));
791 LLVM_DEBUG(dbgs() << " with instruction:\n ");
792 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
793 LLVM_DEBUG(dbgs() << "\n");
Chad Rosierb5933d72016-02-09 19:02:12 +0000794
795 // Erase the old instructions.
796 I->eraseFromParent();
Chad Rosierd7363db2016-02-09 19:09:22 +0000797 MergeMI->eraseFromParent();
Chad Rosierb5933d72016-02-09 19:02:12 +0000798 return NextI;
799}
800
Florian Hahn17554b82019-12-11 09:59:18 +0000801// Apply Fn to all instructions between MI and the beginning of the block, until
802// a def for DefReg is reached. Returns true, iff Fn returns true for all
803// visited instructions. Stop after visiting Limit iterations.
804static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
805 const TargetRegisterInfo *TRI, unsigned Limit,
806 std::function<bool(MachineInstr &, bool)> &Fn) {
807 auto MBB = MI.getParent();
808 for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
809 E = MBB->rend();
810 I != E; I++) {
811 if (!Limit)
812 return false;
813 --Limit;
814
815 bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
Florian Hahn2675a3c2019-12-11 17:17:29 +0000816 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
Florian Hahn17554b82019-12-11 09:59:18 +0000817 TRI->regsOverlap(MOP.getReg(), DefReg);
818 });
819 if (!Fn(*I, isDef))
820 return false;
821 if (isDef)
822 break;
823 }
824 return true;
825}
826
827static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
828 const TargetRegisterInfo *TRI) {
829
830 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
831 if (MOP.isReg() && MOP.isKill())
832 Units.removeReg(MOP.getReg());
833
834 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
835 if (MOP.isReg() && !MOP.isKill())
836 Units.addReg(MOP.getReg());
837}
838
Chad Rosierb5933d72016-02-09 19:02:12 +0000839MachineBasicBlock::iterator
840AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
841 MachineBasicBlock::iterator Paired,
842 const LdStPairFlags &Flags) {
843 MachineBasicBlock::iterator NextI = I;
844 ++NextI;
845 // If NextI is the second of the two instructions to be merged, we need
846 // to skip one further. Either way we merge will invalidate the iterator,
847 // and we don't need to scan the new instruction, as it's a pairwise
848 // instruction, which we're not considering for further action anyway.
849 if (NextI == Paired)
850 ++NextI;
851
852 int SExtIdx = Flags.getSExtIdx();
853 unsigned Opc =
854 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
Chad Rosiere4e15ba2016-03-09 17:29:48 +0000855 bool IsUnscaled = TII->isUnscaledLdSt(Opc);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000856 int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
Chad Rosierb5933d72016-02-09 19:02:12 +0000857
858 bool MergeForward = Flags.getMergeForward();
Florian Hahn17554b82019-12-11 09:59:18 +0000859
860 Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
861 if (MergeForward && RenameReg) {
862 MCRegister RegToRename = getLdStRegOp(*I).getReg();
863 DefinedInBB.addReg(*RenameReg);
864
865 // Return the sub/super register for RenameReg, matching the size of
866 // OriginalReg.
867 auto GetMatchingSubReg = [this,
868 RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
869 for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
870 if (TRI->getMinimalPhysRegClass(OriginalReg) ==
871 TRI->getMinimalPhysRegClass(SubOrSuper))
872 return SubOrSuper;
873 llvm_unreachable("Should have found matching sub or super register!");
874 };
875
876 std::function<bool(MachineInstr &, bool)> UpdateMIs =
877 [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
878 if (IsDef) {
879 bool SeenDef = false;
880 for (auto &MOP : MI.operands()) {
881 // Rename the first explicit definition and all implicit
882 // definitions matching RegToRename.
Florian Hahn2675a3c2019-12-11 17:17:29 +0000883 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
Florian Hahn17554b82019-12-11 09:59:18 +0000884 (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
885 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
886 assert((MOP.isImplicit() ||
887 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
888 "Need renamable operands");
889 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
890 SeenDef = true;
891 }
892 }
893 } else {
894 for (auto &MOP : MI.operands()) {
Florian Hahn2675a3c2019-12-11 17:17:29 +0000895 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
896 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
Florian Hahn17554b82019-12-11 09:59:18 +0000897 assert(MOP.isImplicit() ||
898 (MOP.isRenamable() && !MOP.isEarlyClobber()) &&
899 "Need renamable operands");
900 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
901 }
902 }
903 }
904 LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
905 return true;
906 };
907 forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
908
909 // Make sure the register used for renaming is not used between the paired
910 // instructions. That would trash the content before the new paired
911 // instruction.
912 for (auto &MI :
913 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
914 std::next(I), std::next(Paired)))
915 assert(all_of(MI.operands(),
916 [this, &RenameReg](const MachineOperand &MOP) {
Florian Hahn2675a3c2019-12-11 17:17:29 +0000917 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
Florian Hahn17554b82019-12-11 09:59:18 +0000918 !TRI->regsOverlap(MOP.getReg(), *RenameReg);
919 }) &&
920 "Rename register used between paired instruction, trashing the "
921 "content");
922 }
923
Chad Rosierb5933d72016-02-09 19:02:12 +0000924 // Insert our new paired instruction after whichever of the paired
925 // instructions MergeForward indicates.
926 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
927 // Also based on MergeForward is from where we copy the base register operand
928 // so we get the flags compatible with the input code.
929 const MachineOperand &BaseRegOp =
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000930 MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
Chad Rosierb5933d72016-02-09 19:02:12 +0000931
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000932 int Offset = getLdStOffsetOp(*I).getImm();
933 int PairedOffset = getLdStOffsetOp(*Paired).getImm();
Chad Rosiere4e15ba2016-03-09 17:29:48 +0000934 bool PairedIsUnscaled = TII->isUnscaledLdSt(Paired->getOpcode());
Chad Rosier00f9d232016-02-11 14:25:08 +0000935 if (IsUnscaled != PairedIsUnscaled) {
936 // We're trying to pair instructions that differ in how they are scaled. If
937 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
938 // the opposite (i.e., make Paired's offset unscaled).
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000939 int MemSize = getMemScale(*Paired);
Chad Rosier00f9d232016-02-11 14:25:08 +0000940 if (PairedIsUnscaled) {
941 // If the unscaled offset isn't a multiple of the MemSize, we can't
942 // pair the operations together.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000943 assert(!(PairedOffset % getMemScale(*Paired)) &&
Chad Rosier00f9d232016-02-11 14:25:08 +0000944 "Offset should be a multiple of the stride!");
945 PairedOffset /= MemSize;
946 } else {
947 PairedOffset *= MemSize;
948 }
949 }
950
Chad Rosierb5933d72016-02-09 19:02:12 +0000951 // Which register is Rt and which is Rt2 depends on the offset order.
952 MachineInstr *RtMI, *Rt2MI;
Chad Rosier00f9d232016-02-11 14:25:08 +0000953 if (Offset == PairedOffset + OffsetStride) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000954 RtMI = &*Paired;
955 Rt2MI = &*I;
Chad Rosierb5933d72016-02-09 19:02:12 +0000956 // Here we swapped the assumption made for SExtIdx.
957 // I.e., we turn ldp I, Paired into ldp Paired, I.
958 // Update the index accordingly.
959 if (SExtIdx != -1)
960 SExtIdx = (SExtIdx + 1) % 2;
961 } else {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000962 RtMI = &*I;
963 Rt2MI = &*Paired;
Chad Rosierb5933d72016-02-09 19:02:12 +0000964 }
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000965 int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
Chad Rosier00f9d232016-02-11 14:25:08 +0000966 // Scale the immediate offset, if necessary.
Chad Rosiere4e15ba2016-03-09 17:29:48 +0000967 if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000968 assert(!(OffsetImm % getMemScale(*RtMI)) &&
Chad Rosier00f9d232016-02-11 14:25:08 +0000969 "Unscaled offset cannot be scaled.");
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000970 OffsetImm /= getMemScale(*RtMI);
Chad Rosier87e33412016-02-09 20:18:07 +0000971 }
Chad Rosierb5933d72016-02-09 19:02:12 +0000972
973 // Construct the new instruction.
974 MachineInstrBuilder MIB;
Chad Rosierc46ef882016-02-09 19:33:42 +0000975 DebugLoc DL = I->getDebugLoc();
976 MachineBasicBlock *MBB = I->getParent();
Matthias Braun2e8c11e2017-01-20 18:04:27 +0000977 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
978 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
979 // Kill flags may become invalid when moving stores for pairing.
980 if (RegOp0.isUse()) {
981 if (!MergeForward) {
982 // Clear kill flags on store if moving upwards. Example:
983 // STRWui %w0, ...
984 // USE %w1
985 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
986 RegOp0.setIsKill(false);
987 RegOp1.setIsKill(false);
988 } else {
989 // Clear kill flags of the first stores register. Example:
990 // STRWui %w1, ...
991 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
992 // STRW %w0
Daniel Sanders5ae66e52019-08-12 22:40:53 +0000993 Register Reg = getLdStRegOp(*I).getReg();
Matthias Braun2e8c11e2017-01-20 18:04:27 +0000994 for (MachineInstr &MI : make_range(std::next(I), Paired))
995 MI.clearRegisterKills(Reg, TRI);
996 }
997 }
Chad Rosierc46ef882016-02-09 19:33:42 +0000998 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
Matthias Braun2e8c11e2017-01-20 18:04:27 +0000999 .add(RegOp0)
1000 .add(RegOp1)
Diana Picus116bbab2017-01-13 09:58:52 +00001001 .add(BaseRegOp)
Chad Rosiere40b9512016-03-08 17:16:38 +00001002 .addImm(OffsetImm)
Chandler Carruthc73c0302018-08-16 21:30:05 +00001003 .cloneMergedMemRefs({&*I, &*Paired})
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001004 .setMIFlags(I->mergeFlagsWith(*Paired));
Chad Rosierb5933d72016-02-09 19:02:12 +00001005
1006 (void)MIB;
Tim Northover3b0846e2014-05-24 12:50:23 +00001007
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001008 LLVM_DEBUG(
1009 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1010 LLVM_DEBUG(I->print(dbgs()));
1011 LLVM_DEBUG(dbgs() << " ");
1012 LLVM_DEBUG(Paired->print(dbgs()));
1013 LLVM_DEBUG(dbgs() << " with instruction:\n ");
Quentin Colombet66b61632015-03-06 22:42:10 +00001014 if (SExtIdx != -1) {
1015 // Generate the sign extension for the proper result of the ldp.
1016 // I.e., with X1, that would be:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +00001017 // %w1 = KILL %w1, implicit-def %x1
1018 // %x1 = SBFMXri killed %x1, 0, 31
Quentin Colombet66b61632015-03-06 22:42:10 +00001019 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1020 // Right now, DstMO has the extended register, since it comes from an
1021 // extended opcode.
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001022 Register DstRegX = DstMO.getReg();
Quentin Colombet66b61632015-03-06 22:42:10 +00001023 // Get the W variant of that register.
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001024 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
Quentin Colombet66b61632015-03-06 22:42:10 +00001025 // Update the result of LDP to use the W instead of the X variant.
1026 DstMO.setReg(DstRegW);
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001027 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1028 LLVM_DEBUG(dbgs() << "\n");
Quentin Colombet66b61632015-03-06 22:42:10 +00001029 // Make the machine verifier happy by providing a definition for
1030 // the X register.
1031 // Insert this definition right after the generated LDP, i.e., before
1032 // InsertionPoint.
1033 MachineInstrBuilder MIBKill =
Chad Rosierc46ef882016-02-09 19:33:42 +00001034 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
Quentin Colombet66b61632015-03-06 22:42:10 +00001035 .addReg(DstRegW)
1036 .addReg(DstRegX, RegState::Define);
1037 MIBKill->getOperand(2).setImplicit();
1038 // Create the sign extension.
1039 MachineInstrBuilder MIBSXTW =
Chad Rosierc46ef882016-02-09 19:33:42 +00001040 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
Quentin Colombet66b61632015-03-06 22:42:10 +00001041 .addReg(DstRegX)
1042 .addImm(0)
1043 .addImm(31);
1044 (void)MIBSXTW;
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001045 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1046 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
Quentin Colombet66b61632015-03-06 22:42:10 +00001047 } else {
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001048 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
Quentin Colombet66b61632015-03-06 22:42:10 +00001049 }
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001050 LLVM_DEBUG(dbgs() << "\n");
Tim Northover3b0846e2014-05-24 12:50:23 +00001051
Florian Hahn17554b82019-12-11 09:59:18 +00001052 if (MergeForward)
1053 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1054 if (MOP.isReg() && MOP.isKill())
1055 DefinedInBB.addReg(MOP.getReg());
1056
Tim Northover3b0846e2014-05-24 12:50:23 +00001057 // Erase the old instructions.
1058 I->eraseFromParent();
1059 Paired->eraseFromParent();
1060
1061 return NextI;
1062}
1063
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001064MachineBasicBlock::iterator
1065AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1066 MachineBasicBlock::iterator StoreI) {
1067 MachineBasicBlock::iterator NextI = LoadI;
1068 ++NextI;
1069
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001070 int LoadSize = getMemScale(*LoadI);
1071 int StoreSize = getMemScale(*StoreI);
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001072 Register LdRt = getLdStRegOp(*LoadI).getReg();
Florian Hahn80e48512017-06-21 08:47:23 +00001073 const MachineOperand &StMO = getLdStRegOp(*StoreI);
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001074 Register StRt = getLdStRegOp(*StoreI).getReg();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001075 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1076
1077 assert((IsStoreXReg ||
1078 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1079 "Unexpected RegClass");
1080
1081 MachineInstr *BitExtMI;
1082 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1083 // Remove the load, if the destination register of the loads is the same
1084 // register for stored value.
1085 if (StRt == LdRt && LoadSize == 8) {
Tim Northover9ac3e422017-06-26 18:49:25 +00001086 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1087 LoadI->getIterator())) {
1088 if (MI.killsRegister(StRt, TRI)) {
1089 MI.clearRegisterKills(StRt, TRI);
1090 break;
1091 }
1092 }
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001093 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1094 LLVM_DEBUG(LoadI->print(dbgs()));
1095 LLVM_DEBUG(dbgs() << "\n");
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001096 LoadI->eraseFromParent();
1097 return NextI;
1098 }
1099 // Replace the load with a mov if the load and store are in the same size.
1100 BitExtMI =
1101 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1102 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1103 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
Florian Hahn80e48512017-06-21 08:47:23 +00001104 .add(StMO)
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001105 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1106 .setMIFlags(LoadI->getFlags());
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001107 } else {
1108 // FIXME: Currently we disable this transformation in big-endian targets as
1109 // performance and correctness are verified only in little-endian.
1110 if (!Subtarget->isLittleEndian())
1111 return NextI;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001112 bool IsUnscaled = TII->isUnscaledLdSt(*LoadI);
1113 assert(IsUnscaled == TII->isUnscaledLdSt(*StoreI) &&
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001114 "Unsupported ld/st match");
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001115 assert(LoadSize <= StoreSize && "Invalid load size");
1116 int UnscaledLdOffset = IsUnscaled
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001117 ? getLdStOffsetOp(*LoadI).getImm()
1118 : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001119 int UnscaledStOffset = IsUnscaled
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001120 ? getLdStOffsetOp(*StoreI).getImm()
1121 : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001122 int Width = LoadSize * 8;
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001123 unsigned DestReg =
1124 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1125 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1126 : LdRt;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001127
1128 assert((UnscaledLdOffset >= UnscaledStOffset &&
1129 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1130 "Invalid offset");
1131
Simon Pilgrime461e9a2019-05-08 16:29:39 +00001132 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1133 int Imms = Immr + Width - 1;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001134 if (UnscaledLdOffset == UnscaledStOffset) {
1135 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1136 | ((Immr) << 6) // immr
1137 | ((Imms) << 0) // imms
1138 ;
1139
1140 BitExtMI =
1141 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1142 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1143 DestReg)
Florian Hahn80e48512017-06-21 08:47:23 +00001144 .add(StMO)
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001145 .addImm(AndMaskEncoded)
1146 .setMIFlags(LoadI->getFlags());
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001147 } else {
1148 BitExtMI =
1149 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1150 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1151 DestReg)
Florian Hahn80e48512017-06-21 08:47:23 +00001152 .add(StMO)
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001153 .addImm(Immr)
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001154 .addImm(Imms)
1155 .setMIFlags(LoadI->getFlags());
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001156 }
1157 }
Matthias Braun76bb4132016-12-16 23:55:43 +00001158
Matthias Braund9a59a82017-02-17 23:15:03 +00001159 // Clear kill flags between store and load.
1160 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1161 BitExtMI->getIterator()))
Florian Hahn8552e592017-06-21 09:51:52 +00001162 if (MI.killsRegister(StRt, TRI)) {
1163 MI.clearRegisterKills(StRt, TRI);
1164 break;
1165 }
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001166
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001167 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1168 LLVM_DEBUG(StoreI->print(dbgs()));
1169 LLVM_DEBUG(dbgs() << " ");
1170 LLVM_DEBUG(LoadI->print(dbgs()));
1171 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1172 LLVM_DEBUG(StoreI->print(dbgs()));
1173 LLVM_DEBUG(dbgs() << " ");
1174 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1175 LLVM_DEBUG(dbgs() << "\n");
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001176
1177 // Erase the old instructions.
1178 LoadI->eraseFromParent();
1179 return NextI;
1180}
1181
Tim Northover3b0846e2014-05-24 12:50:23 +00001182static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
Chad Rosier3dd0e942015-08-18 16:20:03 +00001183 // Convert the byte-offset used by unscaled into an "element" offset used
1184 // by the scaled pair load/store instructions.
Chad Rosier00f9d232016-02-11 14:25:08 +00001185 if (IsUnscaled) {
1186 // If the byte-offset isn't a multiple of the stride, there's no point
1187 // trying to match it.
1188 if (Offset % OffsetStride)
1189 return false;
Chad Rosier3dd0e942015-08-18 16:20:03 +00001190 Offset /= OffsetStride;
Chad Rosier00f9d232016-02-11 14:25:08 +00001191 }
Chad Rosier3dd0e942015-08-18 16:20:03 +00001192 return Offset <= 63 && Offset >= -64;
Tim Northover3b0846e2014-05-24 12:50:23 +00001193}
1194
1195// Do alignment, specialized to power of 2 and for signed ints,
1196// avoiding having to do a C-style cast from uint_64t to int when
Rui Ueyamada00f2f2016-01-14 21:06:47 +00001197// using alignTo from include/llvm/Support/MathExtras.h.
Tim Northover3b0846e2014-05-24 12:50:23 +00001198// FIXME: Move this function to include/MathExtras.h?
1199static int alignTo(int Num, int PowOf2) {
1200 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1201}
1202
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001203static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
Chad Rosiera69dcb62017-03-17 14:19:55 +00001204 AliasAnalysis *AA) {
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001205 // One of the instructions must modify memory.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001206 if (!MIa.mayStore() && !MIb.mayStore())
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001207 return false;
1208
1209 // Both instructions must be memory operations.
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001210 if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001211 return false;
1212
Chad Rosiera69dcb62017-03-17 14:19:55 +00001213 return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001214}
1215
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001216static bool mayAlias(MachineInstr &MIa,
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001217 SmallVectorImpl<MachineInstr *> &MemInsns,
Chad Rosiera69dcb62017-03-17 14:19:55 +00001218 AliasAnalysis *AA) {
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +00001219 for (MachineInstr *MIb : MemInsns)
Chad Rosiera69dcb62017-03-17 14:19:55 +00001220 if (mayAlias(MIa, *MIb, AA))
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001221 return true;
1222
1223 return false;
1224}
1225
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001226bool AArch64LoadStoreOpt::findMatchingStore(
1227 MachineBasicBlock::iterator I, unsigned Limit,
1228 MachineBasicBlock::iterator &StoreI) {
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001229 MachineBasicBlock::iterator B = I->getParent()->begin();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001230 MachineBasicBlock::iterator MBBI = I;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001231 MachineInstr &LoadMI = *I;
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001232 Register BaseReg = getLdStBaseOp(LoadMI).getReg();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001233
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001234 // If the load is the first instruction in the block, there's obviously
1235 // not any matching store.
1236 if (MBBI == B)
1237 return false;
1238
Jun Bum Lim47aece12018-04-27 18:44:37 +00001239 // Track which register units have been modified and used between the first
1240 // insn and the second insn.
1241 ModifiedRegUnits.clear();
1242 UsedRegUnits.clear();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001243
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001244 unsigned Count = 0;
1245 do {
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001246 --MBBI;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001247 MachineInstr &MI = *MBBI;
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001248
Geoff Berry4ff2e362016-07-21 15:20:25 +00001249 // Don't count transient instructions towards the search limit since there
1250 // may be different numbers of them if e.g. debug information is present.
1251 if (!MI.isTransient())
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001252 ++Count;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001253
1254 // If the load instruction reads directly from the address to which the
1255 // store instruction writes and the stored value is not modified, we can
1256 // promote the load. Since we do not handle stores with pre-/post-index,
1257 // it's unnecessary to check if BaseReg is modified by the store itself.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001258 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001259 BaseReg == getLdStBaseOp(MI).getReg() &&
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001260 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
Jun Bum Lim47aece12018-04-27 18:44:37 +00001261 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001262 StoreI = MBBI;
1263 return true;
1264 }
1265
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001266 if (MI.isCall())
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001267 return false;
1268
Jun Bum Lim47aece12018-04-27 18:44:37 +00001269 // Update modified / uses register units.
1270 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001271
1272 // Otherwise, if the base register is modified, we have no match, so
1273 // return early.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001274 if (!ModifiedRegUnits.available(BaseReg))
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001275 return false;
1276
1277 // If we encounter a store aliased with the load, return early.
Chad Rosiera69dcb62017-03-17 14:19:55 +00001278 if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001279 return false;
Jun Bum Lim633b2d82016-02-11 16:18:24 +00001280 } while (MBBI != B && Count < Limit);
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001281 return false;
1282}
1283
Chad Rosierc5083c22016-06-10 20:47:14 +00001284// Returns true if FirstMI and MI are candidates for merging or pairing.
1285// Otherwise, returns false.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001286static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
Chad Rosierc5083c22016-06-10 20:47:14 +00001287 LdStPairFlags &Flags,
1288 const AArch64InstrInfo *TII) {
1289 // If this is volatile or if pairing is suppressed, not a candidate.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001290 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
Chad Rosierc5083c22016-06-10 20:47:14 +00001291 return false;
1292
1293 // We should have already checked FirstMI for pair suppression and volatility.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001294 assert(!FirstMI.hasOrderedMemoryRef() &&
1295 !TII->isLdStPairSuppressed(FirstMI) &&
Chad Rosierc5083c22016-06-10 20:47:14 +00001296 "FirstMI shouldn't get here if either of these checks are true.");
1297
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001298 unsigned OpcA = FirstMI.getOpcode();
1299 unsigned OpcB = MI.getOpcode();
Chad Rosierc5083c22016-06-10 20:47:14 +00001300
Chad Rosierc3f6cb92016-02-10 19:45:48 +00001301 // Opcodes match: nothing more to check.
1302 if (OpcA == OpcB)
1303 return true;
1304
1305 // Try to match a sign-extended load/store with a zero-extended load/store.
1306 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1307 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1308 assert(IsValidLdStrOpc &&
1309 "Given Opc should be a Load or Store with an immediate");
1310 // OpcA will be the first instruction in the pair.
1311 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1312 Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1313 return true;
1314 }
Chad Rosier00f9d232016-02-11 14:25:08 +00001315
Chad Rosierd6daac42016-11-07 15:27:22 +00001316 // If the second instruction isn't even a mergable/pairable load/store, bail
1317 // out.
Chad Rosier00f9d232016-02-11 14:25:08 +00001318 if (!PairIsValidLdStrOpc)
1319 return false;
1320
Chad Rosierd6daac42016-11-07 15:27:22 +00001321 // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1322 // offsets.
1323 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
Chad Rosier00f9d232016-02-11 14:25:08 +00001324 return false;
1325
1326 // Try to match an unscaled load/store with a scaled load/store.
Chad Rosiere4e15ba2016-03-09 17:29:48 +00001327 return TII->isUnscaledLdSt(OpcA) != TII->isUnscaledLdSt(OpcB) &&
Chad Rosier00f9d232016-02-11 14:25:08 +00001328 getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB);
1329
1330 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
Chad Rosierc3f6cb92016-02-10 19:45:48 +00001331}
1332
Florian Hahn17554b82019-12-11 09:59:18 +00001333static bool
1334canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1335 SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
1336 const TargetRegisterInfo *TRI) {
1337 if (!FirstMI.mayStore())
1338 return false;
1339
1340 // Check if we can find an unused register which we can use to rename
1341 // the register used by the first load/store.
1342 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1343 MachineFunction &MF = *FirstMI.getParent()->getParent();
1344 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1345 return false;
1346
1347 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1348 // For now, we only rename if the store operand gets killed at the store.
1349 if (!getLdStRegOp(FirstMI).isKill() &&
1350 !any_of(FirstMI.operands(),
1351 [TRI, RegToRename](const MachineOperand &MOP) {
Florian Hahn2675a3c2019-12-11 17:17:29 +00001352 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1353 MOP.isImplicit() && MOP.isKill() &&
Florian Hahn17554b82019-12-11 09:59:18 +00001354 TRI->regsOverlap(RegToRename, MOP.getReg());
1355 })) {
1356 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1357 return false;
1358 }
1359 auto canRenameMOP = [](const MachineOperand &MOP) {
1360 return MOP.isImplicit() ||
1361 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1362 };
1363
1364 bool FoundDef = false;
1365
1366 // For each instruction between FirstMI and the previous def for RegToRename,
1367 // we
1368 // * check if we can rename RegToRename in this instruction
1369 // * collect the registers used and required register classes for RegToRename.
1370 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1371 bool IsDef) {
1372 LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1373 // Currently we do not try to rename across frame-setup instructions.
1374 if (MI.getFlag(MachineInstr::FrameSetup)) {
1375 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1376 << MI << ")\n");
1377 return false;
1378 }
1379
1380 UsedInBetween.accumulate(MI);
1381
1382 // For a definition, check that we can rename the definition and exit the
1383 // loop.
1384 FoundDef = IsDef;
1385
1386 // For defs, check if we can rename the first def of RegToRename.
1387 if (FoundDef) {
1388 for (auto &MOP : MI.operands()) {
Florian Hahn2675a3c2019-12-11 17:17:29 +00001389 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
Florian Hahn17554b82019-12-11 09:59:18 +00001390 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1391 continue;
1392 if (!canRenameMOP(MOP)) {
1393 LLVM_DEBUG(dbgs()
1394 << " Cannot rename " << MOP << " in " << MI << "\n");
1395 return false;
1396 }
1397 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1398 }
1399 return true;
1400 } else {
1401 for (auto &MOP : MI.operands()) {
Florian Hahn2675a3c2019-12-11 17:17:29 +00001402 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1403 !TRI->regsOverlap(MOP.getReg(), RegToRename))
Florian Hahn17554b82019-12-11 09:59:18 +00001404 continue;
1405
1406 if (!canRenameMOP(MOP)) {
1407 LLVM_DEBUG(dbgs()
1408 << " Cannot rename " << MOP << " in " << MI << "\n");
1409 return false;
1410 }
1411 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1412 }
1413 }
1414 return true;
1415 };
1416
1417 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1418 return false;
1419
1420 if (!FoundDef) {
1421 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1422 return false;
1423 }
1424 return true;
1425}
1426
1427// Check if we can find a physical register for renaming. This register must:
1428// * not be defined up to FirstMI (checking DefinedInBB)
1429// * not used between the MI and the defining instruction of the register to
1430// rename (checked using UsedInBetween).
1431// * is available in all used register classes (checked using RequiredClasses).
1432static Optional<MCPhysReg> tryToFindRegisterToRename(
1433 MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
1434 LiveRegUnits &UsedInBetween,
1435 SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
1436 const TargetRegisterInfo *TRI) {
1437 auto &MF = *FirstMI.getParent()->getParent();
1438
1439 // Checks if any sub- or super-register of PR is callee saved.
1440 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1441 return any_of(TRI->sub_and_superregs_inclusive(PR),
1442 [&MF, TRI](MCPhysReg SubOrSuper) {
1443 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1444 });
1445 };
1446
1447 // Check if PR or one of its sub- or super-registers can be used for all
1448 // required register classes.
1449 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1450 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1451 return any_of(TRI->sub_and_superregs_inclusive(PR),
1452 [C, TRI](MCPhysReg SubOrSuper) {
1453 return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1454 });
1455 });
1456 };
1457
1458 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1459 for (const MCPhysReg &PR : *RegClass) {
1460 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1461 !AnySubOrSuperRegCalleePreserved(PR) && CanBeUsedForAllClasses(PR)) {
1462 DefinedInBB.addReg(PR);
1463 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1464 << "\n");
1465 return {PR};
1466 }
1467 }
1468 LLVM_DEBUG(dbgs() << "No rename register found from "
1469 << TRI->getRegClassName(RegClass) << "\n");
1470 return None;
1471}
1472
Chad Rosier9f4ec2e2016-02-10 18:49:28 +00001473/// Scan the instructions looking for a load/store that can be combined with the
1474/// current instruction into a wider equivalent or a load/store pair.
Tim Northover3b0846e2014-05-24 12:50:23 +00001475MachineBasicBlock::iterator
1476AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Jun Bum Limcf974432016-03-31 14:47:24 +00001477 LdStPairFlags &Flags, unsigned Limit,
1478 bool FindNarrowMerge) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001479 MachineBasicBlock::iterator E = I->getParent()->end();
1480 MachineBasicBlock::iterator MBBI = I;
Florian Hahn17554b82019-12-11 09:59:18 +00001481 MachineBasicBlock::iterator MBBIWithRenameReg;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001482 MachineInstr &FirstMI = *I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001483 ++MBBI;
1484
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001485 bool MayLoad = FirstMI.mayLoad();
1486 bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001487 Register Reg = getLdStRegOp(FirstMI).getReg();
1488 Register BaseReg = getLdStBaseOp(FirstMI).getReg();
Chad Rosierf77e9092015-08-06 15:50:12 +00001489 int Offset = getLdStOffsetOp(FirstMI).getImm();
Chad Rosierf11d0402015-10-01 18:17:12 +00001490 int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
Jun Bum Lim397eb7b2016-02-12 15:25:39 +00001491 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001492
Florian Hahn17554b82019-12-11 09:59:18 +00001493 Optional<bool> MaybeCanRename = None;
1494 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
1495 LiveRegUnits UsedInBetween;
1496 UsedInBetween.init(*TRI);
1497
1498 Flags.clearRenameReg();
1499
Jun Bum Lim47aece12018-04-27 18:44:37 +00001500 // Track which register units have been modified and used between the first
1501 // insn (inclusive) and the second insn.
1502 ModifiedRegUnits.clear();
1503 UsedRegUnits.clear();
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001504
1505 // Remember any instructions that read/write memory between FirstMI and MI.
1506 SmallVector<MachineInstr *, 4> MemInsns;
1507
Tim Northover3b0846e2014-05-24 12:50:23 +00001508 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001509 MachineInstr &MI = *MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +00001510
Florian Hahn17554b82019-12-11 09:59:18 +00001511 UsedInBetween.accumulate(MI);
1512
Geoff Berry4ff2e362016-07-21 15:20:25 +00001513 // Don't count transient instructions towards the search limit since there
1514 // may be different numbers of them if e.g. debug information is present.
1515 if (!MI.isTransient())
1516 ++Count;
Tim Northover3b0846e2014-05-24 12:50:23 +00001517
Chad Rosier18896c02016-02-04 16:01:40 +00001518 Flags.setSExtIdx(-1);
Chad Rosierc5083c22016-06-10 20:47:14 +00001519 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
Chad Rosierc3f6cb92016-02-10 19:45:48 +00001520 getLdStOffsetOp(MI).isImm()) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001521 assert(MI.mayLoadOrStore() && "Expected memory operation.");
Tim Northover3b0846e2014-05-24 12:50:23 +00001522 // If we've found another instruction with the same opcode, check to see
1523 // if the base and offset are compatible with our starting instruction.
1524 // These instructions all have scaled immediate operands, so we just
1525 // check for +1/-1. Make sure to check the new instruction offset is
1526 // actually an immediate and not a symbolic reference destined for
1527 // a relocation.
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001528 Register MIBaseReg = getLdStBaseOp(MI).getReg();
Chad Rosierf77e9092015-08-06 15:50:12 +00001529 int MIOffset = getLdStOffsetOp(MI).getImm();
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001530 bool MIIsUnscaled = TII->isUnscaledLdSt(MI);
Chad Rosier00f9d232016-02-11 14:25:08 +00001531 if (IsUnscaled != MIIsUnscaled) {
1532 // We're trying to pair instructions that differ in how they are scaled.
1533 // If FirstMI is scaled then scale the offset of MI accordingly.
1534 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1535 int MemSize = getMemScale(MI);
1536 if (MIIsUnscaled) {
1537 // If the unscaled offset isn't a multiple of the MemSize, we can't
1538 // pair the operations together: bail and keep looking.
Eli Friedmanf184e4b2016-08-12 20:39:51 +00001539 if (MIOffset % MemSize) {
Jun Bum Lim47aece12018-04-27 18:44:37 +00001540 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1541 UsedRegUnits, TRI);
Eli Friedmanf184e4b2016-08-12 20:39:51 +00001542 MemInsns.push_back(&MI);
Chad Rosier00f9d232016-02-11 14:25:08 +00001543 continue;
Eli Friedmanf184e4b2016-08-12 20:39:51 +00001544 }
Chad Rosier00f9d232016-02-11 14:25:08 +00001545 MIOffset /= MemSize;
1546 } else {
1547 MIOffset *= MemSize;
1548 }
1549 }
1550
Tim Northover3b0846e2014-05-24 12:50:23 +00001551 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
1552 (Offset + OffsetStride == MIOffset))) {
1553 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
Jun Bum Limcf974432016-03-31 14:47:24 +00001554 if (FindNarrowMerge) {
Jun Bum Lim80ec0d32015-11-20 21:14:07 +00001555 // If the alignment requirements of the scaled wide load/store
Jun Bum Limcf974432016-03-31 14:47:24 +00001556 // instruction can't express the offset of the scaled narrow input,
1557 // bail and keep looking. For promotable zero stores, allow only when
1558 // the stored value is the same (i.e., WZR).
1559 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1560 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
Jun Bum Lim47aece12018-04-27 18:44:37 +00001561 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1562 UsedRegUnits, TRI);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001563 MemInsns.push_back(&MI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001564 continue;
1565 }
1566 } else {
Chad Rosierd1f6c842016-06-10 20:49:18 +00001567 // Pairwise instructions have a 7-bit signed offset field. Single
1568 // insns have a 12-bit unsigned offset field. If the resultant
1569 // immediate offset of merging these instructions is out of range for
1570 // a pairwise instruction, bail and keep looking.
Jun Bum Limcf974432016-03-31 14:47:24 +00001571 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
Jun Bum Lim47aece12018-04-27 18:44:37 +00001572 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1573 UsedRegUnits, TRI);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001574 MemInsns.push_back(&MI);
Jun Bum Limcf974432016-03-31 14:47:24 +00001575 continue;
1576 }
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001577 // If the alignment requirements of the paired (scaled) instruction
1578 // can't express the offset of the unscaled input, bail and keep
1579 // looking.
1580 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
Jun Bum Lim47aece12018-04-27 18:44:37 +00001581 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1582 UsedRegUnits, TRI);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001583 MemInsns.push_back(&MI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001584 continue;
1585 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001586 }
1587 // If the destination register of the loads is the same register, bail
1588 // and keep looking. A load-pair instruction with both destination
1589 // registers the same is UNPREDICTABLE and will result in an exception.
Jun Bum Limcf974432016-03-31 14:47:24 +00001590 if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
Jun Bum Lim47aece12018-04-27 18:44:37 +00001591 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1592 TRI);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001593 MemInsns.push_back(&MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001594 continue;
1595 }
1596
1597 // If the Rt of the second instruction was not modified or used between
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001598 // the two instructions and none of the instructions between the second
1599 // and first alias with the second, we can combine the second into the
1600 // first.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001601 if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1602 !(MI.mayLoad() &&
1603 !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
Chad Rosiera69dcb62017-03-17 14:19:55 +00001604 !mayAlias(MI, MemInsns, AA)) {
Florian Hahn17554b82019-12-11 09:59:18 +00001605
Chad Rosier96a18a92015-07-21 17:42:04 +00001606 Flags.setMergeForward(false);
Florian Hahn17554b82019-12-11 09:59:18 +00001607 Flags.clearRenameReg();
Tim Northover3b0846e2014-05-24 12:50:23 +00001608 return MBBI;
1609 }
1610
1611 // Likewise, if the Rt of the first instruction is not modified or used
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001612 // between the two instructions and none of the instructions between the
1613 // first and the second alias with the first, we can combine the first
1614 // into the second.
Florian Hahn17554b82019-12-11 09:59:18 +00001615 if (!(MayLoad &&
Jun Bum Lim47aece12018-04-27 18:44:37 +00001616 !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
Chad Rosiera69dcb62017-03-17 14:19:55 +00001617 !mayAlias(FirstMI, MemInsns, AA)) {
Florian Hahn17554b82019-12-11 09:59:18 +00001618
1619 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1620 Flags.setMergeForward(true);
1621 Flags.clearRenameReg();
1622 return MBBI;
1623 }
1624
1625 if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1626 if (!MaybeCanRename)
1627 MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1628 RequiredClasses, TRI)};
1629
1630 if (*MaybeCanRename) {
1631 Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
1632 FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
1633 TRI);
1634 if (MaybeRenameReg) {
1635 Flags.setRenameReg(*MaybeRenameReg);
1636 Flags.setMergeForward(true);
1637 MBBIWithRenameReg = MBBI;
1638 }
1639 }
1640 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001641 }
1642 // Unable to combine these instructions due to interference in between.
1643 // Keep looking.
1644 }
1645 }
1646
Florian Hahn17554b82019-12-11 09:59:18 +00001647 if (Flags.getRenameReg())
1648 return MBBIWithRenameReg;
1649
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001650 // If the instruction wasn't a matching load or store. Stop searching if we
1651 // encounter a call instruction that might modify memory.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001652 if (MI.isCall())
Tim Northover3b0846e2014-05-24 12:50:23 +00001653 return E;
1654
Jun Bum Lim47aece12018-04-27 18:44:37 +00001655 // Update modified / uses register units.
1656 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001657
1658 // Otherwise, if the base register is modified, we have no match, so
1659 // return early.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001660 if (!ModifiedRegUnits.available(BaseReg))
Tim Northover3b0846e2014-05-24 12:50:23 +00001661 return E;
Chad Rosierce8e5ab2015-05-21 21:36:46 +00001662
1663 // Update list of instructions that read/write memory.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001664 if (MI.mayLoadOrStore())
1665 MemInsns.push_back(&MI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001666 }
1667 return E;
1668}
1669
1670MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +00001671AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1672 MachineBasicBlock::iterator Update,
1673 bool IsPreIdx) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001674 assert((Update->getOpcode() == AArch64::ADDXri ||
1675 Update->getOpcode() == AArch64::SUBXri) &&
1676 "Unexpected base register update instruction to merge!");
1677 MachineBasicBlock::iterator NextI = I;
1678 // Return the instruction following the merged instruction, which is
1679 // the instruction following our unmerged load. Unless that's the add/sub
1680 // instruction we're merging, in which case it's the one after that.
1681 if (++NextI == Update)
1682 ++NextI;
1683
1684 int Value = Update->getOperand(2).getImm();
1685 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
Chad Rosier2dfd3542015-09-23 13:51:44 +00001686 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
Tim Northover3b0846e2014-05-24 12:50:23 +00001687 if (Update->getOpcode() == AArch64::SUBXri)
1688 Value = -Value;
1689
Chad Rosier2dfd3542015-09-23 13:51:44 +00001690 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1691 : getPostIndexedOpcode(I->getOpcode());
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001692 MachineInstrBuilder MIB;
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001693 int Scale, MinOffset, MaxOffset;
1694 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001695 if (!isPairedLdSt(*I)) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001696 // Non-paired instruction.
1697 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
Diana Picus116bbab2017-01-13 09:58:52 +00001698 .add(getLdStRegOp(*Update))
1699 .add(getLdStRegOp(*I))
1700 .add(getLdStBaseOp(*I))
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001701 .addImm(Value / Scale)
Chandler Carruthc73c0302018-08-16 21:30:05 +00001702 .setMemRefs(I->memoperands())
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001703 .setMIFlags(I->mergeFlagsWith(*Update));
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001704 } else {
1705 // Paired instruction.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001706 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
Diana Picus116bbab2017-01-13 09:58:52 +00001707 .add(getLdStRegOp(*Update))
1708 .add(getLdStRegOp(*I, 0))
1709 .add(getLdStRegOp(*I, 1))
1710 .add(getLdStBaseOp(*I))
Chad Rosier3ada75f2016-01-28 15:38:24 +00001711 .addImm(Value / Scale)
Chandler Carruthc73c0302018-08-16 21:30:05 +00001712 .setMemRefs(I->memoperands())
Francis Visoiu Mistrih084e7d82018-03-14 17:10:58 +00001713 .setMIFlags(I->mergeFlagsWith(*Update));
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001714 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001715 (void)MIB;
1716
Evandro Menezes5ba804b2017-11-15 21:06:22 +00001717 if (IsPreIdx) {
1718 ++NumPreFolded;
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001719 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
Evandro Menezes5ba804b2017-11-15 21:06:22 +00001720 } else {
1721 ++NumPostFolded;
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001722 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
Evandro Menezes5ba804b2017-11-15 21:06:22 +00001723 }
Nicola Zaghend34e60c2018-05-14 12:53:11 +00001724 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1725 LLVM_DEBUG(I->print(dbgs()));
1726 LLVM_DEBUG(dbgs() << " ");
1727 LLVM_DEBUG(Update->print(dbgs()));
1728 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1729 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1730 LLVM_DEBUG(dbgs() << "\n");
Tim Northover3b0846e2014-05-24 12:50:23 +00001731
1732 // Erase the old instructions for the block.
1733 I->eraseFromParent();
1734 Update->eraseFromParent();
1735
1736 return NextI;
1737}
1738
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001739bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1740 MachineInstr &MI,
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001741 unsigned BaseReg, int Offset) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001742 switch (MI.getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001743 default:
1744 break;
1745 case AArch64::SUBXri:
Tim Northover3b0846e2014-05-24 12:50:23 +00001746 case AArch64::ADDXri:
1747 // Make sure it's a vanilla immediate operand, not a relocation or
1748 // anything else we can't handle.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001749 if (!MI.getOperand(2).isImm())
Tim Northover3b0846e2014-05-24 12:50:23 +00001750 break;
1751 // Watch out for 1 << 12 shifted value.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001752 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
Tim Northover3b0846e2014-05-24 12:50:23 +00001753 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001754
1755 // The update instruction source and destination register must be the
1756 // same as the load/store base register.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001757 if (MI.getOperand(0).getReg() != BaseReg ||
1758 MI.getOperand(1).getReg() != BaseReg)
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001759 break;
1760
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001761 int UpdateOffset = MI.getOperand(2).getImm();
Eli Friedman8585e9d2016-08-12 20:28:02 +00001762 if (MI.getOpcode() == AArch64::SUBXri)
1763 UpdateOffset = -UpdateOffset;
1764
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001765 // The immediate must be a multiple of the scaling factor of the pre/post
1766 // indexed instruction.
1767 int Scale, MinOffset, MaxOffset;
1768 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1769 if (UpdateOffset % Scale != 0)
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001770 break;
1771
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001772 // Scaled offset must fit in the instruction immediate.
1773 int ScaledOffset = UpdateOffset / Scale;
1774 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1775 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001776
1777 // If we have a non-zero Offset, we check that it matches the amount
1778 // we're adding to the register.
Eli Friedman8585e9d2016-08-12 20:28:02 +00001779 if (!Offset || Offset == UpdateOffset)
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001780 return true;
Tim Northover3b0846e2014-05-24 12:50:23 +00001781 break;
1782 }
1783 return false;
1784}
1785
1786MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
Chad Rosier35706ad2016-02-04 21:26:02 +00001787 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001788 MachineBasicBlock::iterator E = I->getParent()->end();
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001789 MachineInstr &MemMI = *I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001790 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001791
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001792 Register BaseReg = getLdStBaseOp(MemMI).getReg();
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001793 int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001794
Chad Rosierb7c5b912015-10-01 13:43:05 +00001795 // Scan forward looking for post-index opportunities. Updating instructions
1796 // can't be formed if the memory instruction doesn't have the offset we're
1797 // looking for.
1798 if (MIUnscaledOffset != UnscaledOffset)
1799 return E;
1800
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001801 // If the base register overlaps a source/destination register, we can't
1802 // merge the update. This does not apply to tag store instructions which
1803 // ignore the address part of the source register.
1804 // This does not apply to STGPi as well, which does not have unpredictable
1805 // behavior in this case unlike normal stores, and always performs writeback
1806 // after reading the source register value.
1807 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1808 bool IsPairedInsn = isPairedLdSt(MemMI);
1809 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1810 Register DestReg = getLdStRegOp(MemMI, i).getReg();
1811 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1812 return E;
1813 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001814 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001815
Jun Bum Lim47aece12018-04-27 18:44:37 +00001816 // Track which register units have been modified and used between the first
1817 // insn (inclusive) and the second insn.
1818 ModifiedRegUnits.clear();
1819 UsedRegUnits.clear();
Tim Northover3b0846e2014-05-24 12:50:23 +00001820 ++MBBI;
Chad Rosier35706ad2016-02-04 21:26:02 +00001821 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001822 MachineInstr &MI = *MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +00001823
Geoff Berry4ff2e362016-07-21 15:20:25 +00001824 // Don't count transient instructions towards the search limit since there
1825 // may be different numbers of them if e.g. debug information is present.
1826 if (!MI.isTransient())
1827 ++Count;
Chad Rosier35706ad2016-02-04 21:26:02 +00001828
Tim Northover3b0846e2014-05-24 12:50:23 +00001829 // If we found a match, return it.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001830 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
Tim Northover3b0846e2014-05-24 12:50:23 +00001831 return MBBI;
1832
1833 // Update the status of what the instruction clobbered and used.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001834 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001835
1836 // Otherwise, if the base register is used or modified, we have no match, so
1837 // return early.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001838 if (!ModifiedRegUnits.available(BaseReg) ||
1839 !UsedRegUnits.available(BaseReg))
Tim Northover3b0846e2014-05-24 12:50:23 +00001840 return E;
1841 }
1842 return E;
1843}
1844
1845MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
Chad Rosier35706ad2016-02-04 21:26:02 +00001846 MachineBasicBlock::iterator I, unsigned Limit) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001847 MachineBasicBlock::iterator B = I->getParent()->begin();
1848 MachineBasicBlock::iterator E = I->getParent()->end();
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001849 MachineInstr &MemMI = *I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001850 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001851
Daniel Sanders5ae66e52019-08-12 22:40:53 +00001852 Register BaseReg = getLdStBaseOp(MemMI).getReg();
Chad Rosierf77e9092015-08-06 15:50:12 +00001853 int Offset = getLdStOffsetOp(MemMI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001854
1855 // If the load/store is the first instruction in the block, there's obviously
1856 // not any matching update. Ditto if the memory offset isn't zero.
1857 if (MBBI == B || Offset != 0)
1858 return E;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001859 // If the base register overlaps a destination register, we can't
Tim Northover3b0846e2014-05-24 12:50:23 +00001860 // merge the update.
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00001861 if (!isTagStore(MemMI)) {
1862 bool IsPairedInsn = isPairedLdSt(MemMI);
1863 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1864 Register DestReg = getLdStRegOp(MemMI, i).getReg();
1865 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1866 return E;
1867 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001868 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001869
Jun Bum Lim47aece12018-04-27 18:44:37 +00001870 // Track which register units have been modified and used between the first
1871 // insn (inclusive) and the second insn.
1872 ModifiedRegUnits.clear();
1873 UsedRegUnits.clear();
Geoff Berry173b14d2016-02-09 20:47:21 +00001874 unsigned Count = 0;
1875 do {
1876 --MBBI;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001877 MachineInstr &MI = *MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +00001878
Geoff Berry4ff2e362016-07-21 15:20:25 +00001879 // Don't count transient instructions towards the search limit since there
1880 // may be different numbers of them if e.g. debug information is present.
1881 if (!MI.isTransient())
Geoff Berry173b14d2016-02-09 20:47:21 +00001882 ++Count;
Chad Rosier35706ad2016-02-04 21:26:02 +00001883
Tim Northover3b0846e2014-05-24 12:50:23 +00001884 // If we found a match, return it.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001885 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset))
Tim Northover3b0846e2014-05-24 12:50:23 +00001886 return MBBI;
1887
1888 // Update the status of what the instruction clobbered and used.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001889 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001890
1891 // Otherwise, if the base register is used or modified, we have no match, so
1892 // return early.
Jun Bum Lim47aece12018-04-27 18:44:37 +00001893 if (!ModifiedRegUnits.available(BaseReg) ||
1894 !UsedRegUnits.available(BaseReg))
Tim Northover3b0846e2014-05-24 12:50:23 +00001895 return E;
Geoff Berry173b14d2016-02-09 20:47:21 +00001896 } while (MBBI != B && Count < Limit);
Tim Northover3b0846e2014-05-24 12:50:23 +00001897 return E;
1898}
1899
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001900bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
1901 MachineBasicBlock::iterator &MBBI) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001902 MachineInstr &MI = *MBBI;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001903 // If this is a volatile load, don't mess with it.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001904 if (MI.hasOrderedMemoryRef())
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001905 return false;
1906
1907 // Make sure this is a reg+imm.
1908 // FIXME: It is possible to extend it to handle reg+reg cases.
1909 if (!getLdStOffsetOp(MI).isImm())
1910 return false;
1911
Chad Rosier35706ad2016-02-04 21:26:02 +00001912 // Look backward up to LdStLimit instructions.
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001913 MachineBasicBlock::iterator StoreI;
Chad Rosier35706ad2016-02-04 21:26:02 +00001914 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00001915 ++NumLoadsFromStoresPromoted;
1916 // Promote the load. Keeping the iterator straight is a
1917 // pain, so we let the merge routine tell us what the next instruction
1918 // is after it's done mucking about.
1919 MBBI = promoteLoadFromStore(MBBI, StoreI);
1920 return true;
1921 }
1922 return false;
1923}
1924
Chad Rosierd6daac42016-11-07 15:27:22 +00001925// Merge adjacent zero stores into a wider store.
1926bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
Chad Rosier24c46ad2016-02-09 18:10:20 +00001927 MachineBasicBlock::iterator &MBBI) {
Chad Rosierd6daac42016-11-07 15:27:22 +00001928 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001929 MachineInstr &MI = *MBBI;
1930 MachineBasicBlock::iterator E = MI.getParent()->end();
Chad Rosier24c46ad2016-02-09 18:10:20 +00001931
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001932 if (!TII->isCandidateToMergeOrPair(MI))
Chad Rosier24c46ad2016-02-09 18:10:20 +00001933 return false;
1934
1935 // Look ahead up to LdStLimit instructions for a mergable instruction.
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001936 LdStPairFlags Flags;
Jun Bum Lim397eb7b2016-02-12 15:25:39 +00001937 MachineBasicBlock::iterator MergeMI =
Jun Bum Limcf974432016-03-31 14:47:24 +00001938 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
Chad Rosierd7363db2016-02-09 19:09:22 +00001939 if (MergeMI != E) {
Chad Rosierd6daac42016-11-07 15:27:22 +00001940 ++NumZeroStoresPromoted;
1941
Chad Rosier24c46ad2016-02-09 18:10:20 +00001942 // Keeping the iterator straight is a pain, so we let the merge routine tell
1943 // us what the next instruction is after it's done mucking about.
Chad Rosierd6daac42016-11-07 15:27:22 +00001944 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
Chad Rosier24c46ad2016-02-09 18:10:20 +00001945 return true;
1946 }
1947 return false;
1948}
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001949
Chad Rosier24c46ad2016-02-09 18:10:20 +00001950// Find loads and stores that can be merged into a single load or store pair
1951// instruction.
1952bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001953 MachineInstr &MI = *MBBI;
1954 MachineBasicBlock::iterator E = MI.getParent()->end();
Chad Rosier24c46ad2016-02-09 18:10:20 +00001955
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001956 if (!TII->isCandidateToMergeOrPair(MI))
Chad Rosier24c46ad2016-02-09 18:10:20 +00001957 return false;
1958
Chad Rosierfc3bf1f2016-02-10 15:52:46 +00001959 // Early exit if the offset is not possible to match. (6 bits of positive
1960 // range, plus allow an extra one in case we find a later insn that matches
1961 // with Offset-1)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001962 bool IsUnscaled = TII->isUnscaledLdSt(MI);
Chad Rosierfc3bf1f2016-02-10 15:52:46 +00001963 int Offset = getLdStOffsetOp(MI).getImm();
1964 int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
Nirav Dave0f9d1112017-01-04 21:21:46 +00001965 // Allow one more for offset.
1966 if (Offset > 0)
1967 Offset -= OffsetStride;
Chad Rosierfc3bf1f2016-02-10 15:52:46 +00001968 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
1969 return false;
1970
Chad Rosier24c46ad2016-02-09 18:10:20 +00001971 // Look ahead up to LdStLimit instructions for a pairable instruction.
1972 LdStPairFlags Flags;
Jun Bum Limcf974432016-03-31 14:47:24 +00001973 MachineBasicBlock::iterator Paired =
1974 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
Chad Rosier24c46ad2016-02-09 18:10:20 +00001975 if (Paired != E) {
1976 ++NumPairCreated;
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +00001977 if (TII->isUnscaledLdSt(MI))
Chad Rosier24c46ad2016-02-09 18:10:20 +00001978 ++NumUnscaledPairCreated;
1979 // Keeping the iterator straight is a pain, so we let the merge routine tell
1980 // us what the next instruction is after it's done mucking about.
Florian Hahn17554b82019-12-11 09:59:18 +00001981 auto Prev = std::prev(MBBI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001982 MBBI = mergePairedInsns(MBBI, Paired, Flags);
Florian Hahn17554b82019-12-11 09:59:18 +00001983 // Collect liveness info for instructions between Prev and the new position
1984 // MBBI.
1985 for (auto I = std::next(Prev); I != MBBI; I++)
1986 updateDefinedRegisters(*I, DefinedInBB, TRI);
1987
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001988 return true;
1989 }
1990 return false;
1991}
1992
Evandro Menezes5ba804b2017-11-15 21:06:22 +00001993bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
1994 (MachineBasicBlock::iterator &MBBI) {
1995 MachineInstr &MI = *MBBI;
1996 MachineBasicBlock::iterator E = MI.getParent()->end();
1997 MachineBasicBlock::iterator Update;
1998
1999 // Look forward to try to form a post-index instruction. For example,
2000 // ldr x0, [x20]
2001 // add x20, x20, #32
2002 // merged into:
2003 // ldr x0, [x20], #32
2004 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2005 if (Update != E) {
2006 // Merge the update into the ld/st.
2007 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2008 return true;
2009 }
2010
2011 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2012 if (TII->isUnscaledLdSt(MI.getOpcode()))
2013 return false;
2014
2015 // Look back to try to find a pre-index instruction. For example,
2016 // add x0, x0, #8
2017 // ldr x1, [x0]
2018 // merged into:
2019 // ldr x1, [x0, #8]!
2020 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2021 if (Update != E) {
2022 // Merge the update into the ld/st.
2023 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2024 return true;
2025 }
2026
2027 // The immediate in the load/store is scaled by the size of the memory
2028 // operation. The immediate in the add we're looking for,
2029 // however, is not, so adjust here.
2030 int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
2031
Evgeniy Stepanovc2bda3e2019-09-20 17:36:27 +00002032 // Look forward to try to find a pre-index instruction. For example,
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002033 // ldr x1, [x0, #64]
2034 // add x0, x0, #64
2035 // merged into:
2036 // ldr x1, [x0, #64]!
2037 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2038 if (Update != E) {
2039 // Merge the update into the ld/st.
2040 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2041 return true;
2042 }
2043
2044 return false;
2045}
2046
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00002047bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
Chad Rosierd6daac42016-11-07 15:27:22 +00002048 bool EnableNarrowZeroStOpt) {
Florian Hahn17554b82019-12-11 09:59:18 +00002049
Tim Northover3b0846e2014-05-24 12:50:23 +00002050 bool Modified = false;
Chad Rosierdbdb1d62016-02-01 21:38:31 +00002051 // Four tranformations to do here:
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00002052 // 1) Find loads that directly read from stores and promote them by
2053 // replacing with mov instructions. If the store is wider than the load,
2054 // the load will be replaced with a bitfield extract.
2055 // e.g.,
2056 // str w1, [x0, #4]
2057 // ldrh w2, [x0, #6]
2058 // ; becomes
2059 // str w1, [x0, #4]
NAKAMURA Takumife1202c2016-06-20 00:37:41 +00002060 // lsr w2, w1, #16
Tim Northover3b0846e2014-05-24 12:50:23 +00002061 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00002062 MBBI != E;) {
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002063 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2064 Modified = true;
2065 else
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00002066 ++MBBI;
Jun Bum Lim6755c3b2015-12-22 16:36:16 +00002067 }
Chad Rosierd6daac42016-11-07 15:27:22 +00002068 // 2) Merge adjacent zero stores into a wider store.
Jun Bum Lim1de2d442016-02-05 20:02:03 +00002069 // e.g.,
2070 // strh wzr, [x0]
2071 // strh wzr, [x0, #2]
2072 // ; becomes
2073 // str wzr, [x0]
Chad Rosierd6daac42016-11-07 15:27:22 +00002074 // e.g.,
2075 // str wzr, [x0]
2076 // str wzr, [x0, #4]
2077 // ; becomes
2078 // str xzr, [x0]
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002079 if (EnableNarrowZeroStOpt)
2080 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2081 MBBI != E;) {
2082 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
Jun Bum Limc9879ec2015-10-27 19:16:03 +00002083 Modified = true;
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002084 else
Jun Bum Lim33be4992016-05-06 15:08:57 +00002085 ++MBBI;
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002086 }
Chad Rosierdbdb1d62016-02-01 21:38:31 +00002087 // 3) Find loads and stores that can be merged into a single load or store
2088 // pair instruction.
2089 // e.g.,
2090 // ldr x0, [x2]
2091 // ldr x1, [x2, #8]
2092 // ; becomes
2093 // ldp x0, x1, [x2]
Florian Hahn17554b82019-12-11 09:59:18 +00002094
2095 if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2096 DefinedInBB.clear();
2097 DefinedInBB.addLiveIns(MBB);
2098 }
2099
Jun Bum Limc9879ec2015-10-27 19:16:03 +00002100 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
Tim Northover3b0846e2014-05-24 12:50:23 +00002101 MBBI != E;) {
Florian Hahn17554b82019-12-11 09:59:18 +00002102 // Track currently live registers up to this point, to help with
2103 // searching for a rename register on demand.
2104 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
Geoff Berry22dfbc52016-08-12 15:26:00 +00002105 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2106 Modified = true;
2107 else
Tim Northover3b0846e2014-05-24 12:50:23 +00002108 ++MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +00002109 }
Chad Rosierdbdb1d62016-02-01 21:38:31 +00002110 // 4) Find base register updates that can be merged into the load or store
2111 // as a base-reg writeback.
2112 // e.g.,
2113 // ldr x0, [x2]
2114 // add x2, x2, #4
2115 // ; becomes
2116 // ldr x0, [x2], #4
Tim Northover3b0846e2014-05-24 12:50:23 +00002117 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2118 MBBI != E;) {
Evandro Menezes5ba804b2017-11-15 21:06:22 +00002119 if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2120 Modified = true;
2121 else
Tim Northover3b0846e2014-05-24 12:50:23 +00002122 ++MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +00002123 }
2124
2125 return Modified;
2126}
2127
2128bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Matthias Braunf1caa282017-12-15 22:22:58 +00002129 if (skipFunction(Fn.getFunction()))
Andrew Kaylor1ac98bb2016-04-25 21:58:52 +00002130 return false;
2131
Oliver Stannardd414c992015-11-10 11:04:18 +00002132 Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
2133 TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2134 TRI = Subtarget->getRegisterInfo();
Chad Rosiera69dcb62017-03-17 14:19:55 +00002135 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
Tim Northover3b0846e2014-05-24 12:50:23 +00002136
Jun Bum Lim47aece12018-04-27 18:44:37 +00002137 // Resize the modified and used register unit trackers. We do this once
2138 // per function and then clear the register units each time we optimize a load
2139 // or store.
2140 ModifiedRegUnits.init(*TRI);
2141 UsedRegUnits.init(*TRI);
Florian Hahn17554b82019-12-11 09:59:18 +00002142 DefinedInBB.init(*TRI);
Chad Rosierbba881e2016-02-02 15:02:30 +00002143
Tim Northover3b0846e2014-05-24 12:50:23 +00002144 bool Modified = false;
Chad Rosier10c7aaa2016-11-11 14:10:12 +00002145 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
Florian Hahn17554b82019-12-11 09:59:18 +00002146 for (auto &MBB : Fn) {
2147 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2148 Modified |= M;
2149 }
Tim Northover3b0846e2014-05-24 12:50:23 +00002150
2151 return Modified;
2152}
2153
Chad Rosier8ade0342016-11-11 19:52:45 +00002154// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2155// stores near one another? Note: The pre-RA instruction scheduler already has
2156// hooks to try and schedule pairable loads/stores together to improve pairing
2157// opportunities. Thus, pre-RA pairing pass may not be worth the effort.
Tim Northover3b0846e2014-05-24 12:50:23 +00002158
Chad Rosier3f8b09d2016-02-09 19:42:19 +00002159// FIXME: When pairing store instructions it's very possible for this pass to
2160// hoist a store with a KILL marker above another use (without a KILL marker).
2161// The resulting IR is invalid, but nothing uses the KILL markers after this
2162// pass, so it's never caused a problem in practice.
2163
Chad Rosier43f5c842015-08-05 12:40:13 +00002164/// createAArch64LoadStoreOptimizationPass - returns an instance of the
2165/// load / store optimization pass.
Tim Northover3b0846e2014-05-24 12:50:23 +00002166FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
2167 return new AArch64LoadStoreOpt();
2168}