blob: de3930cd0ce744c0ad3674a39ddaaebfe5cd286b [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64InstrInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000016#include "AArch64Subtarget.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000017#include "MCTargetDesc/AArch64AddressingModes.h"
18#include "llvm/ADT/BitVector.h"
Chad Rosierce8e5ab2015-05-21 21:36:46 +000019#include "llvm/ADT/SmallVector.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +000020#include "llvm/ADT/Statistic.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000021#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000025#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/Support/raw_ostream.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +000029#include "llvm/Target/TargetInstrInfo.h"
30#include "llvm/Target/TargetMachine.h"
31#include "llvm/Target/TargetRegisterInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000032using namespace llvm;
33
34#define DEBUG_TYPE "aarch64-ldst-opt"
35
36/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
37/// load / store instructions to form ldp / stp instructions.
38
39STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
40STATISTIC(NumPostFolded, "Number of post-index updates folded");
41STATISTIC(NumPreFolded, "Number of pre-index updates folded");
42STATISTIC(NumUnscaledPairCreated,
43 "Number of load/store from unscaled generated");
44
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +000045static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
46 cl::init(20), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000047
48// Place holder while testing unscaled load/store combining
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +000049static cl::opt<bool> EnableAArch64UnscaledMemOp(
50 "aarch64-unscaled-mem-op", cl::Hidden,
51 cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
Tim Northover3b0846e2014-05-24 12:50:23 +000052
Chad Rosier96530b32015-08-05 13:44:51 +000053namespace llvm {
54void initializeAArch64LoadStoreOptPass(PassRegistry &);
55}
56
57#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
58
Tim Northover3b0846e2014-05-24 12:50:23 +000059namespace {
Chad Rosier96a18a92015-07-21 17:42:04 +000060
61typedef struct LdStPairFlags {
62 // If a matching instruction is found, MergeForward is set to true if the
63 // merge is to remove the first instruction and replace the second with
64 // a pair-wise insn, and false if the reverse is true.
65 bool MergeForward;
66
67 // SExtIdx gives the index of the result of the load pair that must be
68 // extended. The value of SExtIdx assumes that the paired load produces the
69 // value in this order: (I, returned iterator), i.e., -1 means no value has
70 // to be extended, 0 means I, and 1 means the returned iterator.
71 int SExtIdx;
72
73 LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
74
75 void setMergeForward(bool V = true) { MergeForward = V; }
76 bool getMergeForward() const { return MergeForward; }
77
78 void setSExtIdx(int V) { SExtIdx = V; }
79 int getSExtIdx() const { return SExtIdx; }
80
81} LdStPairFlags;
82
Tim Northover3b0846e2014-05-24 12:50:23 +000083struct AArch64LoadStoreOpt : public MachineFunctionPass {
84 static char ID;
Chad Rosier96530b32015-08-05 13:44:51 +000085 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
86 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
87 }
Tim Northover3b0846e2014-05-24 12:50:23 +000088
89 const AArch64InstrInfo *TII;
90 const TargetRegisterInfo *TRI;
91
92 // Scan the instructions looking for a load/store that can be combined
93 // with the current instruction into a load/store pair.
94 // Return the matching instruction if one is found, else MBB->end().
Tim Northover3b0846e2014-05-24 12:50:23 +000095 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +000096 LdStPairFlags &Flags,
Tim Northover3b0846e2014-05-24 12:50:23 +000097 unsigned Limit);
98 // Merge the two instructions indicated into a single pair-wise instruction.
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +000099 // If MergeForward is true, erase the first instruction and fold its
Tim Northover3b0846e2014-05-24 12:50:23 +0000100 // operation into the second. If false, the reverse. Return the instruction
101 // following the first instruction (which may change during processing).
102 MachineBasicBlock::iterator
103 mergePairedInsns(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +0000104 MachineBasicBlock::iterator Paired,
Chad Rosierfe5399f2015-07-21 17:47:56 +0000105 const LdStPairFlags &Flags);
Tim Northover3b0846e2014-05-24 12:50:23 +0000106
107 // Scan the instruction list to find a base register update that can
108 // be combined with the current instruction (a load or store) using
109 // pre or post indexed addressing with writeback. Scan forwards.
110 MachineBasicBlock::iterator
111 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
112 int Value);
113
114 // Scan the instruction list to find a base register update that can
115 // be combined with the current instruction (a load or store) using
116 // pre or post indexed addressing with writeback. Scan backwards.
117 MachineBasicBlock::iterator
118 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
119
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000120 // Find an instruction that updates the base register of the ld/st
121 // instruction.
122 bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
123 unsigned BaseReg, int Offset);
124
Chad Rosier2dfd3542015-09-23 13:51:44 +0000125 // Merge a pre- or post-index base register update into a ld/st instruction.
Tim Northover3b0846e2014-05-24 12:50:23 +0000126 MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +0000127 mergeUpdateInsn(MachineBasicBlock::iterator I,
128 MachineBasicBlock::iterator Update, bool IsPreIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000129
130 bool optimizeBlock(MachineBasicBlock &MBB);
131
132 bool runOnMachineFunction(MachineFunction &Fn) override;
133
134 const char *getPassName() const override {
Chad Rosier96530b32015-08-05 13:44:51 +0000135 return AARCH64_LOAD_STORE_OPT_NAME;
Tim Northover3b0846e2014-05-24 12:50:23 +0000136 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000137};
138char AArch64LoadStoreOpt::ID = 0;
Jim Grosbach1eee3df2014-08-11 22:42:31 +0000139} // namespace
Tim Northover3b0846e2014-05-24 12:50:23 +0000140
Chad Rosier96530b32015-08-05 13:44:51 +0000141INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
142 AARCH64_LOAD_STORE_OPT_NAME, false, false)
143
Chad Rosier22eb7102015-08-06 17:37:18 +0000144static bool isUnscaledLdSt(unsigned Opc) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000145 switch (Opc) {
146 default:
147 return false;
148 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000149 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000150 case AArch64::STURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000151 case AArch64::STURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000152 case AArch64::STURXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000153 case AArch64::LDURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000154 case AArch64::LDURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000155 case AArch64::LDURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000156 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000157 case AArch64::LDURXi:
Quentin Colombet29f55332015-01-24 01:25:54 +0000158 case AArch64::LDURSWi:
159 return true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000160 }
161}
162
Chad Rosier22eb7102015-08-06 17:37:18 +0000163static bool isUnscaledLdSt(MachineInstr *MI) {
164 return isUnscaledLdSt(MI->getOpcode());
165}
166
Chad Rosier32d4d372015-09-29 16:07:32 +0000167// Scaling factor for unscaled load or store.
168static int getMemScale(MachineInstr *MI) {
Chad Rosier22eb7102015-08-06 17:37:18 +0000169 switch (MI->getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000170 default:
Chad Rosierdabe2532015-09-29 18:26:15 +0000171 llvm_unreachable("Opcode has unknown scale!");
172 case AArch64::LDRBBui:
173 case AArch64::STRBBui:
174 return 1;
175 case AArch64::LDRHHui:
176 case AArch64::STRHHui:
177 return 2;
Chad Rosiera4d32172015-09-29 14:57:10 +0000178 case AArch64::LDRSui:
179 case AArch64::LDURSi:
180 case AArch64::LDRSWui:
181 case AArch64::LDURSWi:
182 case AArch64::LDRWui:
183 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000184 case AArch64::STRSui:
185 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000186 case AArch64::STRWui:
187 case AArch64::STURWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000188 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000189 case AArch64::LDPSWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000190 case AArch64::LDPWi:
191 case AArch64::STPSi:
192 case AArch64::STPWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000193 return 4;
Chad Rosiera4d32172015-09-29 14:57:10 +0000194 case AArch64::LDRDui:
195 case AArch64::LDURDi:
196 case AArch64::LDRXui:
197 case AArch64::LDURXi:
198 case AArch64::STRDui:
199 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000200 case AArch64::STRXui:
201 case AArch64::STURXi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000202 case AArch64::LDPDi:
203 case AArch64::LDPXi:
204 case AArch64::STPDi:
205 case AArch64::STPXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000206 return 8;
Tim Northover3b0846e2014-05-24 12:50:23 +0000207 case AArch64::LDRQui:
208 case AArch64::LDURQi:
Chad Rosiera4d32172015-09-29 14:57:10 +0000209 case AArch64::STRQui:
210 case AArch64::STURQi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000211 case AArch64::LDPQi:
212 case AArch64::STPQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000213 return 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000214 }
215}
216
Quentin Colombet66b61632015-03-06 22:42:10 +0000217static unsigned getMatchingNonSExtOpcode(unsigned Opc,
218 bool *IsValidLdStrOpc = nullptr) {
219 if (IsValidLdStrOpc)
220 *IsValidLdStrOpc = true;
221 switch (Opc) {
222 default:
223 if (IsValidLdStrOpc)
224 *IsValidLdStrOpc = false;
225 return UINT_MAX;
226 case AArch64::STRDui:
227 case AArch64::STURDi:
228 case AArch64::STRQui:
229 case AArch64::STURQi:
230 case AArch64::STRWui:
231 case AArch64::STURWi:
232 case AArch64::STRXui:
233 case AArch64::STURXi:
234 case AArch64::LDRDui:
235 case AArch64::LDURDi:
236 case AArch64::LDRQui:
237 case AArch64::LDURQi:
238 case AArch64::LDRWui:
239 case AArch64::LDURWi:
240 case AArch64::LDRXui:
241 case AArch64::LDURXi:
242 case AArch64::STRSui:
243 case AArch64::STURSi:
244 case AArch64::LDRSui:
245 case AArch64::LDURSi:
246 return Opc;
247 case AArch64::LDRSWui:
248 return AArch64::LDRWui;
249 case AArch64::LDURSWi:
250 return AArch64::LDURWi;
251 }
252}
253
Tim Northover3b0846e2014-05-24 12:50:23 +0000254static unsigned getMatchingPairOpcode(unsigned Opc) {
255 switch (Opc) {
256 default:
257 llvm_unreachable("Opcode has no pairwise equivalent!");
258 case AArch64::STRSui:
259 case AArch64::STURSi:
260 return AArch64::STPSi;
261 case AArch64::STRDui:
262 case AArch64::STURDi:
263 return AArch64::STPDi;
264 case AArch64::STRQui:
265 case AArch64::STURQi:
266 return AArch64::STPQi;
267 case AArch64::STRWui:
268 case AArch64::STURWi:
269 return AArch64::STPWi;
270 case AArch64::STRXui:
271 case AArch64::STURXi:
272 return AArch64::STPXi;
273 case AArch64::LDRSui:
274 case AArch64::LDURSi:
275 return AArch64::LDPSi;
276 case AArch64::LDRDui:
277 case AArch64::LDURDi:
278 return AArch64::LDPDi;
279 case AArch64::LDRQui:
280 case AArch64::LDURQi:
281 return AArch64::LDPQi;
282 case AArch64::LDRWui:
283 case AArch64::LDURWi:
284 return AArch64::LDPWi;
285 case AArch64::LDRXui:
286 case AArch64::LDURXi:
287 return AArch64::LDPXi;
Quentin Colombet29f55332015-01-24 01:25:54 +0000288 case AArch64::LDRSWui:
289 case AArch64::LDURSWi:
290 return AArch64::LDPSWi;
Tim Northover3b0846e2014-05-24 12:50:23 +0000291 }
292}
293
294static unsigned getPreIndexedOpcode(unsigned Opc) {
295 switch (Opc) {
296 default:
297 llvm_unreachable("Opcode has no pre-indexed equivalent!");
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000298 case AArch64::STRSui:
299 return AArch64::STRSpre;
300 case AArch64::STRDui:
301 return AArch64::STRDpre;
302 case AArch64::STRQui:
303 return AArch64::STRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000304 case AArch64::STRBBui:
305 return AArch64::STRBBpre;
306 case AArch64::STRHHui:
307 return AArch64::STRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000308 case AArch64::STRWui:
309 return AArch64::STRWpre;
310 case AArch64::STRXui:
311 return AArch64::STRXpre;
312 case AArch64::LDRSui:
313 return AArch64::LDRSpre;
314 case AArch64::LDRDui:
315 return AArch64::LDRDpre;
316 case AArch64::LDRQui:
317 return AArch64::LDRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000318 case AArch64::LDRBBui:
319 return AArch64::LDRBBpre;
320 case AArch64::LDRHHui:
321 return AArch64::LDRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000322 case AArch64::LDRWui:
323 return AArch64::LDRWpre;
324 case AArch64::LDRXui:
325 return AArch64::LDRXpre;
Quentin Colombet29f55332015-01-24 01:25:54 +0000326 case AArch64::LDRSWui:
327 return AArch64::LDRSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000328 case AArch64::LDPSi:
329 return AArch64::LDPSpre;
Chad Rosier43150122015-09-29 20:39:55 +0000330 case AArch64::LDPSWi:
331 return AArch64::LDPSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000332 case AArch64::LDPDi:
333 return AArch64::LDPDpre;
334 case AArch64::LDPQi:
335 return AArch64::LDPQpre;
336 case AArch64::LDPWi:
337 return AArch64::LDPWpre;
338 case AArch64::LDPXi:
339 return AArch64::LDPXpre;
340 case AArch64::STPSi:
341 return AArch64::STPSpre;
342 case AArch64::STPDi:
343 return AArch64::STPDpre;
344 case AArch64::STPQi:
345 return AArch64::STPQpre;
346 case AArch64::STPWi:
347 return AArch64::STPWpre;
348 case AArch64::STPXi:
349 return AArch64::STPXpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000350 }
351}
352
353static unsigned getPostIndexedOpcode(unsigned Opc) {
354 switch (Opc) {
355 default:
356 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
357 case AArch64::STRSui:
358 return AArch64::STRSpost;
359 case AArch64::STRDui:
360 return AArch64::STRDpost;
361 case AArch64::STRQui:
362 return AArch64::STRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000363 case AArch64::STRBBui:
364 return AArch64::STRBBpost;
365 case AArch64::STRHHui:
366 return AArch64::STRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000367 case AArch64::STRWui:
368 return AArch64::STRWpost;
369 case AArch64::STRXui:
370 return AArch64::STRXpost;
371 case AArch64::LDRSui:
372 return AArch64::LDRSpost;
373 case AArch64::LDRDui:
374 return AArch64::LDRDpost;
375 case AArch64::LDRQui:
376 return AArch64::LDRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000377 case AArch64::LDRBBui:
378 return AArch64::LDRBBpost;
379 case AArch64::LDRHHui:
380 return AArch64::LDRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000381 case AArch64::LDRWui:
382 return AArch64::LDRWpost;
383 case AArch64::LDRXui:
384 return AArch64::LDRXpost;
Quentin Colombet29f55332015-01-24 01:25:54 +0000385 case AArch64::LDRSWui:
386 return AArch64::LDRSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000387 case AArch64::LDPSi:
388 return AArch64::LDPSpost;
Chad Rosier43150122015-09-29 20:39:55 +0000389 case AArch64::LDPSWi:
390 return AArch64::LDPSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000391 case AArch64::LDPDi:
392 return AArch64::LDPDpost;
393 case AArch64::LDPQi:
394 return AArch64::LDPQpost;
395 case AArch64::LDPWi:
396 return AArch64::LDPWpost;
397 case AArch64::LDPXi:
398 return AArch64::LDPXpost;
399 case AArch64::STPSi:
400 return AArch64::STPSpost;
401 case AArch64::STPDi:
402 return AArch64::STPDpost;
403 case AArch64::STPQi:
404 return AArch64::STPQpost;
405 case AArch64::STPWi:
406 return AArch64::STPWpost;
407 case AArch64::STPXi:
408 return AArch64::STPXpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000409 }
410}
411
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000412static bool isPairedLdSt(const MachineInstr *MI) {
413 switch (MI->getOpcode()) {
414 default:
415 return false;
416 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000417 case AArch64::LDPSWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000418 case AArch64::LDPDi:
419 case AArch64::LDPQi:
420 case AArch64::LDPWi:
421 case AArch64::LDPXi:
422 case AArch64::STPSi:
423 case AArch64::STPDi:
424 case AArch64::STPQi:
425 case AArch64::STPWi:
426 case AArch64::STPXi:
427 return true;
428 }
429}
430
431static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
432 unsigned PairedRegOp = 0) {
433 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
434 unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
435 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000436}
437
438static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000439 unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
440 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000441}
442
443static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000444 unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
445 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000446}
447
Tim Northover3b0846e2014-05-24 12:50:23 +0000448MachineBasicBlock::iterator
449AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
450 MachineBasicBlock::iterator Paired,
Chad Rosier96a18a92015-07-21 17:42:04 +0000451 const LdStPairFlags &Flags) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000452 MachineBasicBlock::iterator NextI = I;
453 ++NextI;
454 // If NextI is the second of the two instructions to be merged, we need
455 // to skip one further. Either way we merge will invalidate the iterator,
456 // and we don't need to scan the new instruction, as it's a pairwise
457 // instruction, which we're not considering for further action anyway.
458 if (NextI == Paired)
459 ++NextI;
460
Chad Rosier96a18a92015-07-21 17:42:04 +0000461 int SExtIdx = Flags.getSExtIdx();
Quentin Colombet66b61632015-03-06 22:42:10 +0000462 unsigned Opc =
463 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
Chad Rosier22eb7102015-08-06 17:37:18 +0000464 bool IsUnscaled = isUnscaledLdSt(Opc);
Tim Northover3b0846e2014-05-24 12:50:23 +0000465 int OffsetStride =
Chad Rosier32d4d372015-09-29 16:07:32 +0000466 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(I) : 1;
Tim Northover3b0846e2014-05-24 12:50:23 +0000467
Chad Rosier96a18a92015-07-21 17:42:04 +0000468 bool MergeForward = Flags.getMergeForward();
Quentin Colombet66b61632015-03-06 22:42:10 +0000469 unsigned NewOpc = getMatchingPairOpcode(Opc);
Tim Northover3b0846e2014-05-24 12:50:23 +0000470 // Insert our new paired instruction after whichever of the paired
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000471 // instructions MergeForward indicates.
472 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
473 // Also based on MergeForward is from where we copy the base register operand
Tim Northover3b0846e2014-05-24 12:50:23 +0000474 // so we get the flags compatible with the input code.
Chad Rosierf77e9092015-08-06 15:50:12 +0000475 const MachineOperand &BaseRegOp =
476 MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000477
478 // Which register is Rt and which is Rt2 depends on the offset order.
479 MachineInstr *RtMI, *Rt2MI;
Chad Rosier08ef4622015-09-03 16:41:28 +0000480 if (getLdStOffsetOp(I).getImm() ==
481 getLdStOffsetOp(Paired).getImm() + OffsetStride) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000482 RtMI = Paired;
483 Rt2MI = I;
Quentin Colombet66b61632015-03-06 22:42:10 +0000484 // Here we swapped the assumption made for SExtIdx.
485 // I.e., we turn ldp I, Paired into ldp Paired, I.
486 // Update the index accordingly.
487 if (SExtIdx != -1)
488 SExtIdx = (SExtIdx + 1) % 2;
Tim Northover3b0846e2014-05-24 12:50:23 +0000489 } else {
490 RtMI = I;
491 Rt2MI = Paired;
492 }
Chad Rosier08ef4622015-09-03 16:41:28 +0000493 // Handle Unscaled
Chad Rosierf77e9092015-08-06 15:50:12 +0000494 int OffsetImm = getLdStOffsetOp(RtMI).getImm();
Chad Rosier08ef4622015-09-03 16:41:28 +0000495 if (IsUnscaled && EnableAArch64UnscaledMemOp)
496 OffsetImm /= OffsetStride;
Tim Northover3b0846e2014-05-24 12:50:23 +0000497
498 // Construct the new instruction.
499 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
500 I->getDebugLoc(), TII->get(NewOpc))
Chad Rosierf77e9092015-08-06 15:50:12 +0000501 .addOperand(getLdStRegOp(RtMI))
502 .addOperand(getLdStRegOp(Rt2MI))
Tim Northover3b0846e2014-05-24 12:50:23 +0000503 .addOperand(BaseRegOp)
504 .addImm(OffsetImm);
505 (void)MIB;
506
507 // FIXME: Do we need/want to copy the mem operands from the source
508 // instructions? Probably. What uses them after this?
509
510 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
511 DEBUG(I->print(dbgs()));
512 DEBUG(dbgs() << " ");
513 DEBUG(Paired->print(dbgs()));
514 DEBUG(dbgs() << " with instruction:\n ");
Quentin Colombet66b61632015-03-06 22:42:10 +0000515
516 if (SExtIdx != -1) {
517 // Generate the sign extension for the proper result of the ldp.
518 // I.e., with X1, that would be:
519 // %W1<def> = KILL %W1, %X1<imp-def>
520 // %X1<def> = SBFMXri %X1<kill>, 0, 31
521 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
522 // Right now, DstMO has the extended register, since it comes from an
523 // extended opcode.
524 unsigned DstRegX = DstMO.getReg();
525 // Get the W variant of that register.
526 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
527 // Update the result of LDP to use the W instead of the X variant.
528 DstMO.setReg(DstRegW);
529 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
530 DEBUG(dbgs() << "\n");
531 // Make the machine verifier happy by providing a definition for
532 // the X register.
533 // Insert this definition right after the generated LDP, i.e., before
534 // InsertionPoint.
535 MachineInstrBuilder MIBKill =
536 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
537 TII->get(TargetOpcode::KILL), DstRegW)
538 .addReg(DstRegW)
539 .addReg(DstRegX, RegState::Define);
540 MIBKill->getOperand(2).setImplicit();
541 // Create the sign extension.
542 MachineInstrBuilder MIBSXTW =
543 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
544 TII->get(AArch64::SBFMXri), DstRegX)
545 .addReg(DstRegX)
546 .addImm(0)
547 .addImm(31);
548 (void)MIBSXTW;
549 DEBUG(dbgs() << " Extend operand:\n ");
550 DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
551 DEBUG(dbgs() << "\n");
552 } else {
553 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
554 DEBUG(dbgs() << "\n");
555 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000556
557 // Erase the old instructions.
558 I->eraseFromParent();
559 Paired->eraseFromParent();
560
561 return NextI;
562}
563
564/// trackRegDefsUses - Remember what registers the specified instruction uses
565/// and modifies.
Pete Cooper7be8f8f2015-08-03 19:04:32 +0000566static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
Tim Northover3b0846e2014-05-24 12:50:23 +0000567 BitVector &UsedRegs,
568 const TargetRegisterInfo *TRI) {
Pete Cooper7be8f8f2015-08-03 19:04:32 +0000569 for (const MachineOperand &MO : MI->operands()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000570 if (MO.isRegMask())
571 ModifiedRegs.setBitsNotInMask(MO.getRegMask());
572
573 if (!MO.isReg())
574 continue;
575 unsigned Reg = MO.getReg();
576 if (MO.isDef()) {
577 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
578 ModifiedRegs.set(*AI);
579 } else {
580 assert(MO.isUse() && "Reg operand not a def and not a use?!?");
581 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
582 UsedRegs.set(*AI);
583 }
584 }
585}
586
587static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
Chad Rosier3dd0e942015-08-18 16:20:03 +0000588 // Convert the byte-offset used by unscaled into an "element" offset used
589 // by the scaled pair load/store instructions.
Chad Rosier08ef4622015-09-03 16:41:28 +0000590 if (IsUnscaled)
Chad Rosier3dd0e942015-08-18 16:20:03 +0000591 Offset /= OffsetStride;
592
593 return Offset <= 63 && Offset >= -64;
Tim Northover3b0846e2014-05-24 12:50:23 +0000594}
595
596// Do alignment, specialized to power of 2 and for signed ints,
597// avoiding having to do a C-style cast from uint_64t to int when
598// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
599// FIXME: Move this function to include/MathExtras.h?
600static int alignTo(int Num, int PowOf2) {
601 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
602}
603
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000604static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
605 const AArch64InstrInfo *TII) {
606 // One of the instructions must modify memory.
607 if (!MIa->mayStore() && !MIb->mayStore())
608 return false;
609
610 // Both instructions must be memory operations.
611 if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
612 return false;
613
614 return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
615}
616
617static bool mayAlias(MachineInstr *MIa,
618 SmallVectorImpl<MachineInstr *> &MemInsns,
619 const AArch64InstrInfo *TII) {
620 for (auto &MIb : MemInsns)
621 if (mayAlias(MIa, MIb, TII))
622 return true;
623
624 return false;
625}
626
Tim Northover3b0846e2014-05-24 12:50:23 +0000627/// findMatchingInsn - Scan the instructions looking for a load/store that can
628/// be combined with the current instruction into a load/store pair.
629MachineBasicBlock::iterator
630AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +0000631 LdStPairFlags &Flags,
Quentin Colombet66b61632015-03-06 22:42:10 +0000632 unsigned Limit) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000633 MachineBasicBlock::iterator E = I->getParent()->end();
634 MachineBasicBlock::iterator MBBI = I;
635 MachineInstr *FirstMI = I;
636 ++MBBI;
637
Matthias Braunfa3872e2015-05-18 20:27:55 +0000638 unsigned Opc = FirstMI->getOpcode();
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000639 bool MayLoad = FirstMI->mayLoad();
Chad Rosier22eb7102015-08-06 17:37:18 +0000640 bool IsUnscaled = isUnscaledLdSt(FirstMI);
Chad Rosierf77e9092015-08-06 15:50:12 +0000641 unsigned Reg = getLdStRegOp(FirstMI).getReg();
642 unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
643 int Offset = getLdStOffsetOp(FirstMI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +0000644
645 // Early exit if the first instruction modifies the base register.
646 // e.g., ldr x0, [x0]
Tim Northover3b0846e2014-05-24 12:50:23 +0000647 if (FirstMI->modifiesRegister(BaseReg, TRI))
648 return E;
Chad Rosiercaed6db2015-08-10 17:17:19 +0000649
650 // Early exit if the offset if not possible to match. (6 bits of positive
651 // range, plus allow an extra one in case we find a later insn that matches
652 // with Offset-1)
Tim Northover3b0846e2014-05-24 12:50:23 +0000653 int OffsetStride =
Chad Rosier32d4d372015-09-29 16:07:32 +0000654 IsUnscaled && EnableAArch64UnscaledMemOp ? getMemScale(FirstMI) : 1;
Tim Northover3b0846e2014-05-24 12:50:23 +0000655 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
656 return E;
657
658 // Track which registers have been modified and used between the first insn
659 // (inclusive) and the second insn.
660 BitVector ModifiedRegs, UsedRegs;
661 ModifiedRegs.resize(TRI->getNumRegs());
662 UsedRegs.resize(TRI->getNumRegs());
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000663
664 // Remember any instructions that read/write memory between FirstMI and MI.
665 SmallVector<MachineInstr *, 4> MemInsns;
666
Tim Northover3b0846e2014-05-24 12:50:23 +0000667 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
668 MachineInstr *MI = MBBI;
669 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
670 // optimization by changing how far we scan.
671 if (MI->isDebugValue())
672 continue;
673
674 // Now that we know this is a real instruction, count it.
675 ++Count;
676
Chad Rosier08ef4622015-09-03 16:41:28 +0000677 bool CanMergeOpc = Opc == MI->getOpcode();
678 Flags.setSExtIdx(-1);
679 if (!CanMergeOpc) {
680 bool IsValidLdStrOpc;
681 unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
682 assert(IsValidLdStrOpc &&
683 "Given Opc should be a Load or Store with an immediate");
684 // Opc will be the first instruction in the pair.
685 Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
686 CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
687 }
688
689 if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
Chad Rosierc56a9132015-08-10 18:42:45 +0000690 assert(MI->mayLoadOrStore() && "Expected memory operation.");
Tim Northover3b0846e2014-05-24 12:50:23 +0000691 // If we've found another instruction with the same opcode, check to see
692 // if the base and offset are compatible with our starting instruction.
693 // These instructions all have scaled immediate operands, so we just
694 // check for +1/-1. Make sure to check the new instruction offset is
695 // actually an immediate and not a symbolic reference destined for
696 // a relocation.
697 //
698 // Pairwise instructions have a 7-bit signed offset field. Single insns
699 // have a 12-bit unsigned offset field. To be a valid combine, the
700 // final offset must be in range.
Chad Rosierf77e9092015-08-06 15:50:12 +0000701 unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
702 int MIOffset = getLdStOffsetOp(MI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +0000703 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
704 (Offset + OffsetStride == MIOffset))) {
705 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
706 // If this is a volatile load/store that otherwise matched, stop looking
707 // as something is going on that we don't have enough information to
708 // safely transform. Similarly, stop if we see a hint to avoid pairs.
709 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
710 return E;
711 // If the resultant immediate offset of merging these instructions
712 // is out of range for a pairwise instruction, bail and keep looking.
Chad Rosier08ef4622015-09-03 16:41:28 +0000713 bool MIIsUnscaled = isUnscaledLdSt(MI);
714 if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000715 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
Chad Rosierc56a9132015-08-10 18:42:45 +0000716 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000717 continue;
718 }
719 // If the alignment requirements of the paired (scaled) instruction
720 // can't express the offset of the unscaled input, bail and keep
721 // looking.
722 if (IsUnscaled && EnableAArch64UnscaledMemOp &&
723 (alignTo(MinOffset, OffsetStride) != MinOffset)) {
724 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
Chad Rosierc56a9132015-08-10 18:42:45 +0000725 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000726 continue;
727 }
728 // If the destination register of the loads is the same register, bail
729 // and keep looking. A load-pair instruction with both destination
730 // registers the same is UNPREDICTABLE and will result in an exception.
Chad Rosierf77e9092015-08-06 15:50:12 +0000731 if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000732 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
Chad Rosierc56a9132015-08-10 18:42:45 +0000733 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000734 continue;
735 }
736
737 // If the Rt of the second instruction was not modified or used between
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000738 // the two instructions and none of the instructions between the second
739 // and first alias with the second, we can combine the second into the
740 // first.
Chad Rosierf77e9092015-08-06 15:50:12 +0000741 if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
742 !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000743 !mayAlias(MI, MemInsns, TII)) {
Chad Rosier96a18a92015-07-21 17:42:04 +0000744 Flags.setMergeForward(false);
Tim Northover3b0846e2014-05-24 12:50:23 +0000745 return MBBI;
746 }
747
748 // Likewise, if the Rt of the first instruction is not modified or used
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000749 // between the two instructions and none of the instructions between the
750 // first and the second alias with the first, we can combine the first
751 // into the second.
Chad Rosierf77e9092015-08-06 15:50:12 +0000752 if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
Chad Rosier5f668e12015-09-03 14:19:43 +0000753 !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000754 !mayAlias(FirstMI, MemInsns, TII)) {
Chad Rosier96a18a92015-07-21 17:42:04 +0000755 Flags.setMergeForward(true);
Tim Northover3b0846e2014-05-24 12:50:23 +0000756 return MBBI;
757 }
758 // Unable to combine these instructions due to interference in between.
759 // Keep looking.
760 }
761 }
762
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000763 // If the instruction wasn't a matching load or store. Stop searching if we
764 // encounter a call instruction that might modify memory.
765 if (MI->isCall())
Tim Northover3b0846e2014-05-24 12:50:23 +0000766 return E;
767
768 // Update modified / uses register lists.
769 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
770
771 // Otherwise, if the base register is modified, we have no match, so
772 // return early.
773 if (ModifiedRegs[BaseReg])
774 return E;
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000775
776 // Update list of instructions that read/write memory.
777 if (MI->mayLoadOrStore())
778 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000779 }
780 return E;
781}
782
783MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +0000784AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
785 MachineBasicBlock::iterator Update,
786 bool IsPreIdx) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000787 assert((Update->getOpcode() == AArch64::ADDXri ||
788 Update->getOpcode() == AArch64::SUBXri) &&
789 "Unexpected base register update instruction to merge!");
790 MachineBasicBlock::iterator NextI = I;
791 // Return the instruction following the merged instruction, which is
792 // the instruction following our unmerged load. Unless that's the add/sub
793 // instruction we're merging, in which case it's the one after that.
794 if (++NextI == Update)
795 ++NextI;
796
797 int Value = Update->getOperand(2).getImm();
798 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
Chad Rosier2dfd3542015-09-23 13:51:44 +0000799 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
Tim Northover3b0846e2014-05-24 12:50:23 +0000800 if (Update->getOpcode() == AArch64::SUBXri)
801 Value = -Value;
802
Chad Rosier2dfd3542015-09-23 13:51:44 +0000803 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
804 : getPostIndexedOpcode(I->getOpcode());
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000805 MachineInstrBuilder MIB;
806 if (!isPairedLdSt(I)) {
807 // Non-paired instruction.
808 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
809 .addOperand(getLdStRegOp(Update))
810 .addOperand(getLdStRegOp(I))
811 .addOperand(getLdStBaseOp(I))
812 .addImm(Value);
813 } else {
814 // Paired instruction.
Chad Rosier32d4d372015-09-29 16:07:32 +0000815 int Scale = getMemScale(I);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000816 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
817 .addOperand(getLdStRegOp(Update))
818 .addOperand(getLdStRegOp(I, 0))
819 .addOperand(getLdStRegOp(I, 1))
820 .addOperand(getLdStBaseOp(I))
821 .addImm(Value / Scale);
822 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000823 (void)MIB;
824
Chad Rosier2dfd3542015-09-23 13:51:44 +0000825 if (IsPreIdx)
826 DEBUG(dbgs() << "Creating pre-indexed load/store.");
827 else
828 DEBUG(dbgs() << "Creating post-indexed load/store.");
Tim Northover3b0846e2014-05-24 12:50:23 +0000829 DEBUG(dbgs() << " Replacing instructions:\n ");
830 DEBUG(I->print(dbgs()));
831 DEBUG(dbgs() << " ");
832 DEBUG(Update->print(dbgs()));
833 DEBUG(dbgs() << " with instruction:\n ");
834 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
835 DEBUG(dbgs() << "\n");
836
837 // Erase the old instructions for the block.
838 I->eraseFromParent();
839 Update->eraseFromParent();
840
841 return NextI;
842}
843
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000844bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
845 MachineInstr *MI,
846 unsigned BaseReg, int Offset) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000847 switch (MI->getOpcode()) {
848 default:
849 break;
850 case AArch64::SUBXri:
851 // Negate the offset for a SUB instruction.
852 Offset *= -1;
853 // FALLTHROUGH
854 case AArch64::ADDXri:
855 // Make sure it's a vanilla immediate operand, not a relocation or
856 // anything else we can't handle.
857 if (!MI->getOperand(2).isImm())
858 break;
859 // Watch out for 1 << 12 shifted value.
860 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
861 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000862
863 // The update instruction source and destination register must be the
864 // same as the load/store base register.
865 if (MI->getOperand(0).getReg() != BaseReg ||
866 MI->getOperand(1).getReg() != BaseReg)
867 break;
868
869 bool IsPairedInsn = isPairedLdSt(MemMI);
870 int UpdateOffset = MI->getOperand(2).getImm();
871 // For non-paired load/store instructions, the immediate must fit in a
872 // signed 9-bit integer.
873 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
874 break;
875
876 // For paired load/store instructions, the immediate must be a multiple of
877 // the scaling factor. The scaled offset must also fit into a signed 7-bit
878 // integer.
879 if (IsPairedInsn) {
Chad Rosier32d4d372015-09-29 16:07:32 +0000880 int Scale = getMemScale(MemMI);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000881 if (UpdateOffset % Scale != 0)
882 break;
883
884 int ScaledOffset = UpdateOffset / Scale;
885 if (ScaledOffset > 64 || ScaledOffset < -64)
886 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000887 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000888
889 // If we have a non-zero Offset, we check that it matches the amount
890 // we're adding to the register.
891 if (!Offset || Offset == MI->getOperand(2).getImm())
892 return true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000893 break;
894 }
895 return false;
896}
897
898MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
899 MachineBasicBlock::iterator I, unsigned Limit, int Value) {
900 MachineBasicBlock::iterator E = I->getParent()->end();
901 MachineInstr *MemMI = I;
902 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000903
Chad Rosierf77e9092015-08-06 15:50:12 +0000904 unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
Chad Rosier32d4d372015-09-29 16:07:32 +0000905 int Offset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000906
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000907 // If the base register overlaps a destination register, we can't
Tim Northover3b0846e2014-05-24 12:50:23 +0000908 // merge the update.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000909 bool IsPairedInsn = isPairedLdSt(MemMI);
910 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
911 unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
912 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
913 return E;
914 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000915
916 // Scan forward looking for post-index opportunities.
917 // Updating instructions can't be formed if the memory insn already
918 // has an offset other than the value we're looking for.
919 if (Offset != Value)
920 return E;
921
922 // Track which registers have been modified and used between the first insn
923 // (inclusive) and the second insn.
924 BitVector ModifiedRegs, UsedRegs;
925 ModifiedRegs.resize(TRI->getNumRegs());
926 UsedRegs.resize(TRI->getNumRegs());
927 ++MBBI;
928 for (unsigned Count = 0; MBBI != E; ++MBBI) {
929 MachineInstr *MI = MBBI;
930 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
931 // optimization by changing how far we scan.
932 if (MI->isDebugValue())
933 continue;
934
935 // Now that we know this is a real instruction, count it.
936 ++Count;
937
938 // If we found a match, return it.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000939 if (isMatchingUpdateInsn(I, MI, BaseReg, Value))
Tim Northover3b0846e2014-05-24 12:50:23 +0000940 return MBBI;
941
942 // Update the status of what the instruction clobbered and used.
943 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
944
945 // Otherwise, if the base register is used or modified, we have no match, so
946 // return early.
947 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
948 return E;
949 }
950 return E;
951}
952
953MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
954 MachineBasicBlock::iterator I, unsigned Limit) {
955 MachineBasicBlock::iterator B = I->getParent()->begin();
956 MachineBasicBlock::iterator E = I->getParent()->end();
957 MachineInstr *MemMI = I;
958 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +0000959
Chad Rosierf77e9092015-08-06 15:50:12 +0000960 unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
961 int Offset = getLdStOffsetOp(MemMI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +0000962
963 // If the load/store is the first instruction in the block, there's obviously
964 // not any matching update. Ditto if the memory offset isn't zero.
965 if (MBBI == B || Offset != 0)
966 return E;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000967 // If the base register overlaps a destination register, we can't
Tim Northover3b0846e2014-05-24 12:50:23 +0000968 // merge the update.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000969 bool IsPairedInsn = isPairedLdSt(MemMI);
970 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
971 unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
972 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
973 return E;
974 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000975
976 // Track which registers have been modified and used between the first insn
977 // (inclusive) and the second insn.
978 BitVector ModifiedRegs, UsedRegs;
979 ModifiedRegs.resize(TRI->getNumRegs());
980 UsedRegs.resize(TRI->getNumRegs());
981 --MBBI;
982 for (unsigned Count = 0; MBBI != B; --MBBI) {
983 MachineInstr *MI = MBBI;
984 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
985 // optimization by changing how far we scan.
986 if (MI->isDebugValue())
987 continue;
988
989 // Now that we know this is a real instruction, count it.
990 ++Count;
991
992 // If we found a match, return it.
Chad Rosier11c825f2015-09-30 19:44:40 +0000993 if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
Tim Northover3b0846e2014-05-24 12:50:23 +0000994 return MBBI;
995
996 // Update the status of what the instruction clobbered and used.
997 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
998
999 // Otherwise, if the base register is used or modified, we have no match, so
1000 // return early.
1001 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1002 return E;
1003 }
1004 return E;
1005}
1006
1007bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
1008 bool Modified = false;
1009 // Two tranformations to do here:
1010 // 1) Find loads and stores that can be merged into a single load or store
1011 // pair instruction.
1012 // e.g.,
1013 // ldr x0, [x2]
1014 // ldr x1, [x2, #8]
1015 // ; becomes
1016 // ldp x0, x1, [x2]
1017 // 2) Find base register updates that can be merged into the load or store
1018 // as a base-reg writeback.
1019 // e.g.,
1020 // ldr x0, [x2]
1021 // add x2, x2, #4
1022 // ; becomes
1023 // ldr x0, [x2], #4
1024
1025 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1026 MBBI != E;) {
1027 MachineInstr *MI = MBBI;
1028 switch (MI->getOpcode()) {
1029 default:
1030 // Just move on to the next instruction.
1031 ++MBBI;
1032 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001033 // Scaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001034 case AArch64::STRSui:
1035 case AArch64::STRDui:
1036 case AArch64::STRQui:
1037 case AArch64::STRXui:
1038 case AArch64::STRWui:
1039 case AArch64::LDRSui:
1040 case AArch64::LDRDui:
1041 case AArch64::LDRQui:
1042 case AArch64::LDRXui:
1043 case AArch64::LDRWui:
Quentin Colombet29f55332015-01-24 01:25:54 +00001044 case AArch64::LDRSWui:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001045 // Unscaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001046 case AArch64::STURSi:
1047 case AArch64::STURDi:
1048 case AArch64::STURQi:
1049 case AArch64::STURWi:
1050 case AArch64::STURXi:
1051 case AArch64::LDURSi:
1052 case AArch64::LDURDi:
1053 case AArch64::LDURQi:
1054 case AArch64::LDURWi:
Quentin Colombet29f55332015-01-24 01:25:54 +00001055 case AArch64::LDURXi:
1056 case AArch64::LDURSWi: {
Tim Northover3b0846e2014-05-24 12:50:23 +00001057 // If this is a volatile load/store, don't mess with it.
1058 if (MI->hasOrderedMemoryRef()) {
1059 ++MBBI;
1060 break;
1061 }
1062 // Make sure this is a reg+imm (as opposed to an address reloc).
Chad Rosierf77e9092015-08-06 15:50:12 +00001063 if (!getLdStOffsetOp(MI).isImm()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001064 ++MBBI;
1065 break;
1066 }
1067 // Check if this load/store has a hint to avoid pair formation.
1068 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1069 if (TII->isLdStPairSuppressed(MI)) {
1070 ++MBBI;
1071 break;
1072 }
1073 // Look ahead up to ScanLimit instructions for a pairable instruction.
Chad Rosier96a18a92015-07-21 17:42:04 +00001074 LdStPairFlags Flags;
Tim Northover3b0846e2014-05-24 12:50:23 +00001075 MachineBasicBlock::iterator Paired =
Chad Rosier96a18a92015-07-21 17:42:04 +00001076 findMatchingInsn(MBBI, Flags, ScanLimit);
Tim Northover3b0846e2014-05-24 12:50:23 +00001077 if (Paired != E) {
Chad Rosier9f4709b2015-08-26 13:39:48 +00001078 ++NumPairCreated;
1079 if (isUnscaledLdSt(MI))
1080 ++NumUnscaledPairCreated;
1081
Tim Northover3b0846e2014-05-24 12:50:23 +00001082 // Merge the loads into a pair. Keeping the iterator straight is a
1083 // pain, so we let the merge routine tell us what the next instruction
1084 // is after it's done mucking about.
Chad Rosier96a18a92015-07-21 17:42:04 +00001085 MBBI = mergePairedInsns(MBBI, Paired, Flags);
Tim Northover3b0846e2014-05-24 12:50:23 +00001086 Modified = true;
Tim Northover3b0846e2014-05-24 12:50:23 +00001087 break;
1088 }
1089 ++MBBI;
1090 break;
1091 }
1092 // FIXME: Do the other instructions.
1093 }
1094 }
1095
1096 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1097 MBBI != E;) {
1098 MachineInstr *MI = MBBI;
1099 // Do update merging. It's simpler to keep this separate from the above
1100 // switch, though not strictly necessary.
Matthias Braunfa3872e2015-05-18 20:27:55 +00001101 unsigned Opc = MI->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +00001102 switch (Opc) {
1103 default:
1104 // Just move on to the next instruction.
1105 ++MBBI;
1106 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001107 // Scaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001108 case AArch64::STRSui:
1109 case AArch64::STRDui:
1110 case AArch64::STRQui:
1111 case AArch64::STRXui:
1112 case AArch64::STRWui:
Chad Rosierdabe2532015-09-29 18:26:15 +00001113 case AArch64::STRHHui:
1114 case AArch64::STRBBui:
Tim Northover3b0846e2014-05-24 12:50:23 +00001115 case AArch64::LDRSui:
1116 case AArch64::LDRDui:
1117 case AArch64::LDRQui:
1118 case AArch64::LDRXui:
1119 case AArch64::LDRWui:
Chad Rosierdabe2532015-09-29 18:26:15 +00001120 case AArch64::LDRHHui:
1121 case AArch64::LDRBBui:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001122 // Unscaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001123 case AArch64::STURSi:
1124 case AArch64::STURDi:
1125 case AArch64::STURQi:
1126 case AArch64::STURWi:
1127 case AArch64::STURXi:
1128 case AArch64::LDURSi:
1129 case AArch64::LDURDi:
1130 case AArch64::LDURQi:
1131 case AArch64::LDURWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001132 case AArch64::LDURXi:
1133 // Paired instructions.
1134 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +00001135 case AArch64::LDPSWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001136 case AArch64::LDPDi:
1137 case AArch64::LDPQi:
1138 case AArch64::LDPWi:
1139 case AArch64::LDPXi:
1140 case AArch64::STPSi:
1141 case AArch64::STPDi:
1142 case AArch64::STPQi:
1143 case AArch64::STPWi:
1144 case AArch64::STPXi: {
Tim Northover3b0846e2014-05-24 12:50:23 +00001145 // Make sure this is a reg+imm (as opposed to an address reloc).
Chad Rosierf77e9092015-08-06 15:50:12 +00001146 if (!getLdStOffsetOp(MI).isImm()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001147 ++MBBI;
1148 break;
1149 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001150 // Look forward to try to form a post-index instruction. For example,
1151 // ldr x0, [x20]
1152 // add x20, x20, #32
1153 // merged into:
1154 // ldr x0, [x20], #32
Tim Northover3b0846e2014-05-24 12:50:23 +00001155 MachineBasicBlock::iterator Update =
1156 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
1157 if (Update != E) {
1158 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001159 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
Tim Northover3b0846e2014-05-24 12:50:23 +00001160 Modified = true;
1161 ++NumPostFolded;
1162 break;
1163 }
1164 // Don't know how to handle pre/post-index versions, so move to the next
1165 // instruction.
Chad Rosier22eb7102015-08-06 17:37:18 +00001166 if (isUnscaledLdSt(Opc)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001167 ++MBBI;
1168 break;
1169 }
1170
1171 // Look back to try to find a pre-index instruction. For example,
1172 // add x0, x0, #8
1173 // ldr x1, [x0]
1174 // merged into:
1175 // ldr x1, [x0, #8]!
1176 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
1177 if (Update != E) {
1178 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001179 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Tim Northover3b0846e2014-05-24 12:50:23 +00001180 Modified = true;
1181 ++NumPreFolded;
1182 break;
1183 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001184 // The immediate in the load/store is scaled by the size of the register
1185 // being loaded. The immediate in the add we're looking for,
1186 // however, is not, so adjust here.
Chad Rosier4f04e2e2015-09-30 16:50:41 +00001187 int Value = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001188
Tim Northover3b0846e2014-05-24 12:50:23 +00001189 // Look forward to try to find a post-index instruction. For example,
1190 // ldr x1, [x0, #64]
1191 // add x0, x0, #64
1192 // merged into:
1193 // ldr x1, [x0, #64]!
Tim Northover3b0846e2014-05-24 12:50:23 +00001194 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
1195 if (Update != E) {
1196 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001197 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Tim Northover3b0846e2014-05-24 12:50:23 +00001198 Modified = true;
1199 ++NumPreFolded;
1200 break;
1201 }
1202
1203 // Nothing found. Just move to the next instruction.
1204 ++MBBI;
1205 break;
1206 }
1207 // FIXME: Do the other instructions.
1208 }
1209 }
1210
1211 return Modified;
1212}
1213
1214bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Eric Christopher6c901622015-01-28 03:51:33 +00001215 TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
1216 TRI = Fn.getSubtarget().getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001217
1218 bool Modified = false;
1219 for (auto &MBB : Fn)
1220 Modified |= optimizeBlock(MBB);
1221
1222 return Modified;
1223}
1224
1225// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
1226// loads and stores near one another?
1227
Chad Rosier43f5c842015-08-05 12:40:13 +00001228/// createAArch64LoadStoreOptimizationPass - returns an instance of the
1229/// load / store optimization pass.
Tim Northover3b0846e2014-05-24 12:50:23 +00001230FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
1231 return new AArch64LoadStoreOpt();
1232}