blob: ffe6ab2cb53dc7b8d3ab7463ad8dd5adc70b69a1 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64InstrInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000016#include "AArch64Subtarget.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000017#include "MCTargetDesc/AArch64AddressingModes.h"
18#include "llvm/ADT/BitVector.h"
Chad Rosierce8e5ab2015-05-21 21:36:46 +000019#include "llvm/ADT/SmallVector.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +000020#include "llvm/ADT/Statistic.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000021#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000025#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/Support/raw_ostream.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +000029#include "llvm/Target/TargetInstrInfo.h"
30#include "llvm/Target/TargetMachine.h"
31#include "llvm/Target/TargetRegisterInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000032using namespace llvm;
33
34#define DEBUG_TYPE "aarch64-ldst-opt"
35
36/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
37/// load / store instructions to form ldp / stp instructions.
38
39STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
40STATISTIC(NumPostFolded, "Number of post-index updates folded");
41STATISTIC(NumPreFolded, "Number of pre-index updates folded");
42STATISTIC(NumUnscaledPairCreated,
43 "Number of load/store from unscaled generated");
Jun Bum Limc9879ec2015-10-27 19:16:03 +000044STATISTIC(NumSmallTypeMerged, "Number of small type loads merged");
Tim Northover3b0846e2014-05-24 12:50:23 +000045
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +000046static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
47 cl::init(20), cl::Hidden);
Tim Northover3b0846e2014-05-24 12:50:23 +000048
Chad Rosier96530b32015-08-05 13:44:51 +000049namespace llvm {
50void initializeAArch64LoadStoreOptPass(PassRegistry &);
51}
52
53#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
54
Tim Northover3b0846e2014-05-24 12:50:23 +000055namespace {
Chad Rosier96a18a92015-07-21 17:42:04 +000056
57typedef struct LdStPairFlags {
58 // If a matching instruction is found, MergeForward is set to true if the
59 // merge is to remove the first instruction and replace the second with
60 // a pair-wise insn, and false if the reverse is true.
61 bool MergeForward;
62
63 // SExtIdx gives the index of the result of the load pair that must be
64 // extended. The value of SExtIdx assumes that the paired load produces the
65 // value in this order: (I, returned iterator), i.e., -1 means no value has
66 // to be extended, 0 means I, and 1 means the returned iterator.
67 int SExtIdx;
68
69 LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
70
71 void setMergeForward(bool V = true) { MergeForward = V; }
72 bool getMergeForward() const { return MergeForward; }
73
74 void setSExtIdx(int V) { SExtIdx = V; }
75 int getSExtIdx() const { return SExtIdx; }
76
77} LdStPairFlags;
78
Tim Northover3b0846e2014-05-24 12:50:23 +000079struct AArch64LoadStoreOpt : public MachineFunctionPass {
80 static char ID;
Jun Bum Lim22fe15e2015-11-06 16:27:47 +000081 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
Chad Rosier96530b32015-08-05 13:44:51 +000082 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
83 }
Tim Northover3b0846e2014-05-24 12:50:23 +000084
85 const AArch64InstrInfo *TII;
86 const TargetRegisterInfo *TRI;
Oliver Stannardd414c992015-11-10 11:04:18 +000087 const AArch64Subtarget *Subtarget;
Tim Northover3b0846e2014-05-24 12:50:23 +000088
89 // Scan the instructions looking for a load/store that can be combined
90 // with the current instruction into a load/store pair.
91 // Return the matching instruction if one is found, else MBB->end().
Tim Northover3b0846e2014-05-24 12:50:23 +000092 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +000093 LdStPairFlags &Flags,
Tim Northover3b0846e2014-05-24 12:50:23 +000094 unsigned Limit);
95 // Merge the two instructions indicated into a single pair-wise instruction.
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +000096 // If MergeForward is true, erase the first instruction and fold its
Tim Northover3b0846e2014-05-24 12:50:23 +000097 // operation into the second. If false, the reverse. Return the instruction
98 // following the first instruction (which may change during processing).
99 MachineBasicBlock::iterator
100 mergePairedInsns(MachineBasicBlock::iterator I,
Chad Rosier96a18a92015-07-21 17:42:04 +0000101 MachineBasicBlock::iterator Paired,
Chad Rosierfe5399f2015-07-21 17:47:56 +0000102 const LdStPairFlags &Flags);
Tim Northover3b0846e2014-05-24 12:50:23 +0000103
104 // Scan the instruction list to find a base register update that can
105 // be combined with the current instruction (a load or store) using
106 // pre or post indexed addressing with writeback. Scan forwards.
107 MachineBasicBlock::iterator
108 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
Chad Rosier0b15e7c2015-10-01 13:33:31 +0000109 int UnscaledOffset);
Tim Northover3b0846e2014-05-24 12:50:23 +0000110
111 // Scan the instruction list to find a base register update that can
112 // be combined with the current instruction (a load or store) using
113 // pre or post indexed addressing with writeback. Scan backwards.
114 MachineBasicBlock::iterator
115 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
116
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000117 // Find an instruction that updates the base register of the ld/st
118 // instruction.
119 bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
120 unsigned BaseReg, int Offset);
121
Chad Rosier2dfd3542015-09-23 13:51:44 +0000122 // Merge a pre- or post-index base register update into a ld/st instruction.
Tim Northover3b0846e2014-05-24 12:50:23 +0000123 MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +0000124 mergeUpdateInsn(MachineBasicBlock::iterator I,
125 MachineBasicBlock::iterator Update, bool IsPreIdx);
Tim Northover3b0846e2014-05-24 12:50:23 +0000126
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000127 // Find and merge foldable ldr/str instructions.
128 bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
129
Jun Bum Lim22fe15e2015-11-06 16:27:47 +0000130 // Check if converting two narrow loads into a single wider load with
131 // bitfield extracts could be enabled.
132 bool enableNarrowLdMerge(MachineFunction &Fn);
133
134 bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
Tim Northover3b0846e2014-05-24 12:50:23 +0000135
136 bool runOnMachineFunction(MachineFunction &Fn) override;
137
138 const char *getPassName() const override {
Chad Rosier96530b32015-08-05 13:44:51 +0000139 return AARCH64_LOAD_STORE_OPT_NAME;
Tim Northover3b0846e2014-05-24 12:50:23 +0000140 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000141};
142char AArch64LoadStoreOpt::ID = 0;
Jim Grosbach1eee3df2014-08-11 22:42:31 +0000143} // namespace
Tim Northover3b0846e2014-05-24 12:50:23 +0000144
Chad Rosier96530b32015-08-05 13:44:51 +0000145INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
146 AARCH64_LOAD_STORE_OPT_NAME, false, false)
147
Chad Rosier22eb7102015-08-06 17:37:18 +0000148static bool isUnscaledLdSt(unsigned Opc) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000149 switch (Opc) {
150 default:
151 return false;
152 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000153 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000154 case AArch64::STURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000155 case AArch64::STURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000156 case AArch64::STURXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000157 case AArch64::LDURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000158 case AArch64::LDURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000159 case AArch64::LDURQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000160 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000161 case AArch64::LDURXi:
Quentin Colombet29f55332015-01-24 01:25:54 +0000162 case AArch64::LDURSWi:
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000163 case AArch64::LDURHHi:
Quentin Colombet29f55332015-01-24 01:25:54 +0000164 return true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000165 }
166}
167
Chad Rosier22eb7102015-08-06 17:37:18 +0000168static bool isUnscaledLdSt(MachineInstr *MI) {
169 return isUnscaledLdSt(MI->getOpcode());
170}
171
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000172static bool isSmallTypeLdMerge(unsigned Opc) {
173 switch (Opc) {
174 default:
175 return false;
176 case AArch64::LDRHHui:
177 case AArch64::LDURHHi:
178 return true;
179 // FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
180 }
181}
182static bool isSmallTypeLdMerge(MachineInstr *MI) {
183 return isSmallTypeLdMerge(MI->getOpcode());
184}
185
Chad Rosier32d4d372015-09-29 16:07:32 +0000186// Scaling factor for unscaled load or store.
187static int getMemScale(MachineInstr *MI) {
Chad Rosier22eb7102015-08-06 17:37:18 +0000188 switch (MI->getOpcode()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000189 default:
Chad Rosierdabe2532015-09-29 18:26:15 +0000190 llvm_unreachable("Opcode has unknown scale!");
191 case AArch64::LDRBBui:
192 case AArch64::STRBBui:
193 return 1;
194 case AArch64::LDRHHui:
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000195 case AArch64::LDURHHi:
Chad Rosierdabe2532015-09-29 18:26:15 +0000196 case AArch64::STRHHui:
197 return 2;
Chad Rosiera4d32172015-09-29 14:57:10 +0000198 case AArch64::LDRSui:
199 case AArch64::LDURSi:
200 case AArch64::LDRSWui:
201 case AArch64::LDURSWi:
202 case AArch64::LDRWui:
203 case AArch64::LDURWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 case AArch64::STRSui:
205 case AArch64::STURSi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000206 case AArch64::STRWui:
207 case AArch64::STURWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000208 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000209 case AArch64::LDPSWi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000210 case AArch64::LDPWi:
211 case AArch64::STPSi:
212 case AArch64::STPWi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000213 return 4;
Chad Rosiera4d32172015-09-29 14:57:10 +0000214 case AArch64::LDRDui:
215 case AArch64::LDURDi:
216 case AArch64::LDRXui:
217 case AArch64::LDURXi:
218 case AArch64::STRDui:
219 case AArch64::STURDi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000220 case AArch64::STRXui:
221 case AArch64::STURXi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000222 case AArch64::LDPDi:
223 case AArch64::LDPXi:
224 case AArch64::STPDi:
225 case AArch64::STPXi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 return 8;
Tim Northover3b0846e2014-05-24 12:50:23 +0000227 case AArch64::LDRQui:
228 case AArch64::LDURQi:
Chad Rosiera4d32172015-09-29 14:57:10 +0000229 case AArch64::STRQui:
230 case AArch64::STURQi:
Chad Rosier32d4d372015-09-29 16:07:32 +0000231 case AArch64::LDPQi:
232 case AArch64::STPQi:
Tim Northover3b0846e2014-05-24 12:50:23 +0000233 return 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000234 }
235}
236
Quentin Colombet66b61632015-03-06 22:42:10 +0000237static unsigned getMatchingNonSExtOpcode(unsigned Opc,
238 bool *IsValidLdStrOpc = nullptr) {
239 if (IsValidLdStrOpc)
240 *IsValidLdStrOpc = true;
241 switch (Opc) {
242 default:
243 if (IsValidLdStrOpc)
244 *IsValidLdStrOpc = false;
245 return UINT_MAX;
246 case AArch64::STRDui:
247 case AArch64::STURDi:
248 case AArch64::STRQui:
249 case AArch64::STURQi:
250 case AArch64::STRWui:
251 case AArch64::STURWi:
252 case AArch64::STRXui:
253 case AArch64::STURXi:
254 case AArch64::LDRDui:
255 case AArch64::LDURDi:
256 case AArch64::LDRQui:
257 case AArch64::LDURQi:
258 case AArch64::LDRWui:
259 case AArch64::LDURWi:
260 case AArch64::LDRXui:
261 case AArch64::LDURXi:
262 case AArch64::STRSui:
263 case AArch64::STURSi:
264 case AArch64::LDRSui:
265 case AArch64::LDURSi:
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000266 case AArch64::LDRHHui:
267 case AArch64::LDURHHi:
Quentin Colombet66b61632015-03-06 22:42:10 +0000268 return Opc;
269 case AArch64::LDRSWui:
270 return AArch64::LDRWui;
271 case AArch64::LDURSWi:
272 return AArch64::LDURWi;
273 }
274}
275
Tim Northover3b0846e2014-05-24 12:50:23 +0000276static unsigned getMatchingPairOpcode(unsigned Opc) {
277 switch (Opc) {
278 default:
279 llvm_unreachable("Opcode has no pairwise equivalent!");
280 case AArch64::STRSui:
281 case AArch64::STURSi:
282 return AArch64::STPSi;
283 case AArch64::STRDui:
284 case AArch64::STURDi:
285 return AArch64::STPDi;
286 case AArch64::STRQui:
287 case AArch64::STURQi:
288 return AArch64::STPQi;
289 case AArch64::STRWui:
290 case AArch64::STURWi:
291 return AArch64::STPWi;
292 case AArch64::STRXui:
293 case AArch64::STURXi:
294 return AArch64::STPXi;
295 case AArch64::LDRSui:
296 case AArch64::LDURSi:
297 return AArch64::LDPSi;
298 case AArch64::LDRDui:
299 case AArch64::LDURDi:
300 return AArch64::LDPDi;
301 case AArch64::LDRQui:
302 case AArch64::LDURQi:
303 return AArch64::LDPQi;
304 case AArch64::LDRWui:
305 case AArch64::LDURWi:
306 return AArch64::LDPWi;
307 case AArch64::LDRXui:
308 case AArch64::LDURXi:
309 return AArch64::LDPXi;
Quentin Colombet29f55332015-01-24 01:25:54 +0000310 case AArch64::LDRSWui:
311 case AArch64::LDURSWi:
312 return AArch64::LDPSWi;
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000313 case AArch64::LDRHHui:
314 return AArch64::LDRWui;
315 case AArch64::LDURHHi:
316 return AArch64::LDURWi;
Tim Northover3b0846e2014-05-24 12:50:23 +0000317 }
318}
319
320static unsigned getPreIndexedOpcode(unsigned Opc) {
321 switch (Opc) {
322 default:
323 llvm_unreachable("Opcode has no pre-indexed equivalent!");
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000324 case AArch64::STRSui:
325 return AArch64::STRSpre;
326 case AArch64::STRDui:
327 return AArch64::STRDpre;
328 case AArch64::STRQui:
329 return AArch64::STRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000330 case AArch64::STRBBui:
331 return AArch64::STRBBpre;
332 case AArch64::STRHHui:
333 return AArch64::STRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000334 case AArch64::STRWui:
335 return AArch64::STRWpre;
336 case AArch64::STRXui:
337 return AArch64::STRXpre;
338 case AArch64::LDRSui:
339 return AArch64::LDRSpre;
340 case AArch64::LDRDui:
341 return AArch64::LDRDpre;
342 case AArch64::LDRQui:
343 return AArch64::LDRQpre;
Chad Rosierdabe2532015-09-29 18:26:15 +0000344 case AArch64::LDRBBui:
345 return AArch64::LDRBBpre;
346 case AArch64::LDRHHui:
347 return AArch64::LDRHHpre;
Tilmann Scheller5d8d72c2014-06-04 12:40:35 +0000348 case AArch64::LDRWui:
349 return AArch64::LDRWpre;
350 case AArch64::LDRXui:
351 return AArch64::LDRXpre;
Quentin Colombet29f55332015-01-24 01:25:54 +0000352 case AArch64::LDRSWui:
353 return AArch64::LDRSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000354 case AArch64::LDPSi:
355 return AArch64::LDPSpre;
Chad Rosier43150122015-09-29 20:39:55 +0000356 case AArch64::LDPSWi:
357 return AArch64::LDPSWpre;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000358 case AArch64::LDPDi:
359 return AArch64::LDPDpre;
360 case AArch64::LDPQi:
361 return AArch64::LDPQpre;
362 case AArch64::LDPWi:
363 return AArch64::LDPWpre;
364 case AArch64::LDPXi:
365 return AArch64::LDPXpre;
366 case AArch64::STPSi:
367 return AArch64::STPSpre;
368 case AArch64::STPDi:
369 return AArch64::STPDpre;
370 case AArch64::STPQi:
371 return AArch64::STPQpre;
372 case AArch64::STPWi:
373 return AArch64::STPWpre;
374 case AArch64::STPXi:
375 return AArch64::STPXpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000376 }
377}
378
379static unsigned getPostIndexedOpcode(unsigned Opc) {
380 switch (Opc) {
381 default:
382 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
383 case AArch64::STRSui:
384 return AArch64::STRSpost;
385 case AArch64::STRDui:
386 return AArch64::STRDpost;
387 case AArch64::STRQui:
388 return AArch64::STRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000389 case AArch64::STRBBui:
390 return AArch64::STRBBpost;
391 case AArch64::STRHHui:
392 return AArch64::STRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000393 case AArch64::STRWui:
394 return AArch64::STRWpost;
395 case AArch64::STRXui:
396 return AArch64::STRXpost;
397 case AArch64::LDRSui:
398 return AArch64::LDRSpost;
399 case AArch64::LDRDui:
400 return AArch64::LDRDpost;
401 case AArch64::LDRQui:
402 return AArch64::LDRQpost;
Chad Rosierdabe2532015-09-29 18:26:15 +0000403 case AArch64::LDRBBui:
404 return AArch64::LDRBBpost;
405 case AArch64::LDRHHui:
406 return AArch64::LDRHHpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000407 case AArch64::LDRWui:
408 return AArch64::LDRWpost;
409 case AArch64::LDRXui:
410 return AArch64::LDRXpost;
Quentin Colombet29f55332015-01-24 01:25:54 +0000411 case AArch64::LDRSWui:
412 return AArch64::LDRSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000413 case AArch64::LDPSi:
414 return AArch64::LDPSpost;
Chad Rosier43150122015-09-29 20:39:55 +0000415 case AArch64::LDPSWi:
416 return AArch64::LDPSWpost;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000417 case AArch64::LDPDi:
418 return AArch64::LDPDpost;
419 case AArch64::LDPQi:
420 return AArch64::LDPQpost;
421 case AArch64::LDPWi:
422 return AArch64::LDPWpost;
423 case AArch64::LDPXi:
424 return AArch64::LDPXpost;
425 case AArch64::STPSi:
426 return AArch64::STPSpost;
427 case AArch64::STPDi:
428 return AArch64::STPDpost;
429 case AArch64::STPQi:
430 return AArch64::STPQpost;
431 case AArch64::STPWi:
432 return AArch64::STPWpost;
433 case AArch64::STPXi:
434 return AArch64::STPXpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000435 }
436}
437
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000438static bool isPairedLdSt(const MachineInstr *MI) {
439 switch (MI->getOpcode()) {
440 default:
441 return false;
442 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +0000443 case AArch64::LDPSWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000444 case AArch64::LDPDi:
445 case AArch64::LDPQi:
446 case AArch64::LDPWi:
447 case AArch64::LDPXi:
448 case AArch64::STPSi:
449 case AArch64::STPDi:
450 case AArch64::STPQi:
451 case AArch64::STPWi:
452 case AArch64::STPXi:
453 return true;
454 }
455}
456
457static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
458 unsigned PairedRegOp = 0) {
459 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
460 unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
461 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000462}
463
464static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000465 unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
466 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000467}
468
469static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000470 unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
471 return MI->getOperand(Idx);
Chad Rosierf77e9092015-08-06 15:50:12 +0000472}
473
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000474// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
475static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
476 MachineInstr *Op1) {
477 assert(MI->memoperands_empty() && "expected a new machineinstr");
478 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
479 (Op1->memoperands_end() - Op1->memoperands_begin());
480
481 MachineFunction *MF = MI->getParent()->getParent();
482 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
483 MachineSDNode::mmo_iterator MemEnd =
484 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
485 MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
486 MI->setMemRefs(MemBegin, MemEnd);
487}
488
Tim Northover3b0846e2014-05-24 12:50:23 +0000489MachineBasicBlock::iterator
490AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
491 MachineBasicBlock::iterator Paired,
Chad Rosier96a18a92015-07-21 17:42:04 +0000492 const LdStPairFlags &Flags) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000493 MachineBasicBlock::iterator NextI = I;
494 ++NextI;
495 // If NextI is the second of the two instructions to be merged, we need
496 // to skip one further. Either way we merge will invalidate the iterator,
497 // and we don't need to scan the new instruction, as it's a pairwise
498 // instruction, which we're not considering for further action anyway.
499 if (NextI == Paired)
500 ++NextI;
501
Chad Rosier96a18a92015-07-21 17:42:04 +0000502 int SExtIdx = Flags.getSExtIdx();
Quentin Colombet66b61632015-03-06 22:42:10 +0000503 unsigned Opc =
504 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
Chad Rosier22eb7102015-08-06 17:37:18 +0000505 bool IsUnscaled = isUnscaledLdSt(Opc);
Chad Rosierf11d0402015-10-01 18:17:12 +0000506 int OffsetStride = IsUnscaled ? getMemScale(I) : 1;
Tim Northover3b0846e2014-05-24 12:50:23 +0000507
Chad Rosier96a18a92015-07-21 17:42:04 +0000508 bool MergeForward = Flags.getMergeForward();
Quentin Colombet66b61632015-03-06 22:42:10 +0000509 unsigned NewOpc = getMatchingPairOpcode(Opc);
Tim Northover3b0846e2014-05-24 12:50:23 +0000510 // Insert our new paired instruction after whichever of the paired
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000511 // instructions MergeForward indicates.
512 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
513 // Also based on MergeForward is from where we copy the base register operand
Tim Northover3b0846e2014-05-24 12:50:23 +0000514 // so we get the flags compatible with the input code.
Chad Rosierf77e9092015-08-06 15:50:12 +0000515 const MachineOperand &BaseRegOp =
516 MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000517
518 // Which register is Rt and which is Rt2 depends on the offset order.
519 MachineInstr *RtMI, *Rt2MI;
Chad Rosier08ef4622015-09-03 16:41:28 +0000520 if (getLdStOffsetOp(I).getImm() ==
521 getLdStOffsetOp(Paired).getImm() + OffsetStride) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000522 RtMI = Paired;
523 Rt2MI = I;
Quentin Colombet66b61632015-03-06 22:42:10 +0000524 // Here we swapped the assumption made for SExtIdx.
525 // I.e., we turn ldp I, Paired into ldp Paired, I.
526 // Update the index accordingly.
527 if (SExtIdx != -1)
528 SExtIdx = (SExtIdx + 1) % 2;
Tim Northover3b0846e2014-05-24 12:50:23 +0000529 } else {
530 RtMI = I;
531 Rt2MI = Paired;
532 }
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000533
James Molloy5b18b4c2015-10-23 10:41:38 +0000534 int OffsetImm = getLdStOffsetOp(RtMI).getImm();
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000535
536 if (isSmallTypeLdMerge(Opc)) {
537 // Change the scaled offset from small to large type.
538 if (!IsUnscaled)
539 OffsetImm /= 2;
540 MachineInstr *RtNewDest = MergeForward ? I : Paired;
Oliver Stannardd414c992015-11-10 11:04:18 +0000541 // When merging small (< 32 bit) loads for big-endian targets, the order of
542 // the component parts gets swapped.
543 if (!Subtarget->isLittleEndian())
544 std::swap(RtMI, Rt2MI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000545 // Construct the new load instruction.
546 // FIXME: currently we support only halfword unsigned load. We need to
547 // handle byte type, signed, and store instructions as well.
548 MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
549 NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
550 TII->get(NewOpc))
551 .addOperand(getLdStRegOp(RtNewDest))
552 .addOperand(BaseRegOp)
553 .addImm(OffsetImm);
554
555 // Copy MachineMemOperands from the original loads.
556 concatenateMemOperands(NewMemMI, I, Paired);
557
558 DEBUG(
559 dbgs()
560 << "Creating the new load and extract. Replacing instructions:\n ");
561 DEBUG(I->print(dbgs()));
562 DEBUG(dbgs() << " ");
563 DEBUG(Paired->print(dbgs()));
564 DEBUG(dbgs() << " with instructions:\n ");
565 DEBUG((NewMemMI)->print(dbgs()));
566
567 MachineInstr *ExtDestMI = MergeForward ? Paired : I;
Oliver Stannardd414c992015-11-10 11:04:18 +0000568 if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000569 // Create the bitfield extract for high half.
570 BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
571 TII->get(AArch64::UBFMWri))
572 .addOperand(getLdStRegOp(Rt2MI))
573 .addReg(getLdStRegOp(RtNewDest).getReg())
574 .addImm(16)
575 .addImm(31);
576 // Create the bitfield extract for low half.
577 BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
578 TII->get(AArch64::ANDWri))
579 .addOperand(getLdStRegOp(RtMI))
580 .addReg(getLdStRegOp(RtNewDest).getReg())
581 .addImm(15);
582 } else {
583 // Create the bitfield extract for low half.
584 BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
585 TII->get(AArch64::ANDWri))
586 .addOperand(getLdStRegOp(RtMI))
587 .addReg(getLdStRegOp(RtNewDest).getReg())
588 .addImm(15);
589 // Create the bitfield extract for high half.
590 BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
591 TII->get(AArch64::UBFMWri))
592 .addOperand(getLdStRegOp(Rt2MI))
593 .addReg(getLdStRegOp(RtNewDest).getReg())
594 .addImm(16)
595 .addImm(31);
596 }
597 DEBUG(dbgs() << " ");
598 DEBUG((BitExtMI1)->print(dbgs()));
599 DEBUG(dbgs() << " ");
600 DEBUG((BitExtMI2)->print(dbgs()));
601 DEBUG(dbgs() << "\n");
602
603 // Erase the old instructions.
604 I->eraseFromParent();
605 Paired->eraseFromParent();
606 return NextI;
607 }
608
609 // Handle Unscaled
Chad Rosierf11d0402015-10-01 18:17:12 +0000610 if (IsUnscaled)
Chad Rosier08ef4622015-09-03 16:41:28 +0000611 OffsetImm /= OffsetStride;
Tim Northover3b0846e2014-05-24 12:50:23 +0000612
613 // Construct the new instruction.
614 MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
615 I->getDebugLoc(), TII->get(NewOpc))
Chad Rosierf77e9092015-08-06 15:50:12 +0000616 .addOperand(getLdStRegOp(RtMI))
617 .addOperand(getLdStRegOp(Rt2MI))
Tim Northover3b0846e2014-05-24 12:50:23 +0000618 .addOperand(BaseRegOp)
619 .addImm(OffsetImm);
620 (void)MIB;
621
622 // FIXME: Do we need/want to copy the mem operands from the source
623 // instructions? Probably. What uses them after this?
624
625 DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n ");
626 DEBUG(I->print(dbgs()));
627 DEBUG(dbgs() << " ");
628 DEBUG(Paired->print(dbgs()));
629 DEBUG(dbgs() << " with instruction:\n ");
Quentin Colombet66b61632015-03-06 22:42:10 +0000630
631 if (SExtIdx != -1) {
632 // Generate the sign extension for the proper result of the ldp.
633 // I.e., with X1, that would be:
634 // %W1<def> = KILL %W1, %X1<imp-def>
635 // %X1<def> = SBFMXri %X1<kill>, 0, 31
636 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
637 // Right now, DstMO has the extended register, since it comes from an
638 // extended opcode.
639 unsigned DstRegX = DstMO.getReg();
640 // Get the W variant of that register.
641 unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
642 // Update the result of LDP to use the W instead of the X variant.
643 DstMO.setReg(DstRegW);
644 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
645 DEBUG(dbgs() << "\n");
646 // Make the machine verifier happy by providing a definition for
647 // the X register.
648 // Insert this definition right after the generated LDP, i.e., before
649 // InsertionPoint.
650 MachineInstrBuilder MIBKill =
651 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
652 TII->get(TargetOpcode::KILL), DstRegW)
653 .addReg(DstRegW)
654 .addReg(DstRegX, RegState::Define);
655 MIBKill->getOperand(2).setImplicit();
656 // Create the sign extension.
657 MachineInstrBuilder MIBSXTW =
658 BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
659 TII->get(AArch64::SBFMXri), DstRegX)
660 .addReg(DstRegX)
661 .addImm(0)
662 .addImm(31);
663 (void)MIBSXTW;
664 DEBUG(dbgs() << " Extend operand:\n ");
665 DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
666 DEBUG(dbgs() << "\n");
667 } else {
668 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
669 DEBUG(dbgs() << "\n");
670 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000671
672 // Erase the old instructions.
673 I->eraseFromParent();
674 Paired->eraseFromParent();
675
676 return NextI;
677}
678
679/// trackRegDefsUses - Remember what registers the specified instruction uses
680/// and modifies.
Pete Cooper7be8f8f2015-08-03 19:04:32 +0000681static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
Tim Northover3b0846e2014-05-24 12:50:23 +0000682 BitVector &UsedRegs,
683 const TargetRegisterInfo *TRI) {
Pete Cooper7be8f8f2015-08-03 19:04:32 +0000684 for (const MachineOperand &MO : MI->operands()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000685 if (MO.isRegMask())
686 ModifiedRegs.setBitsNotInMask(MO.getRegMask());
687
688 if (!MO.isReg())
689 continue;
690 unsigned Reg = MO.getReg();
691 if (MO.isDef()) {
692 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
693 ModifiedRegs.set(*AI);
694 } else {
695 assert(MO.isUse() && "Reg operand not a def and not a use?!?");
696 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
697 UsedRegs.set(*AI);
698 }
699 }
700}
701
702static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
Chad Rosier3dd0e942015-08-18 16:20:03 +0000703 // Convert the byte-offset used by unscaled into an "element" offset used
704 // by the scaled pair load/store instructions.
Chad Rosier08ef4622015-09-03 16:41:28 +0000705 if (IsUnscaled)
Chad Rosier3dd0e942015-08-18 16:20:03 +0000706 Offset /= OffsetStride;
707
708 return Offset <= 63 && Offset >= -64;
Tim Northover3b0846e2014-05-24 12:50:23 +0000709}
710
711// Do alignment, specialized to power of 2 and for signed ints,
712// avoiding having to do a C-style cast from uint_64t to int when
713// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
714// FIXME: Move this function to include/MathExtras.h?
715static int alignTo(int Num, int PowOf2) {
716 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
717}
718
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000719static bool mayAlias(MachineInstr *MIa, MachineInstr *MIb,
720 const AArch64InstrInfo *TII) {
721 // One of the instructions must modify memory.
722 if (!MIa->mayStore() && !MIb->mayStore())
723 return false;
724
725 // Both instructions must be memory operations.
726 if (!MIa->mayLoadOrStore() && !MIb->mayLoadOrStore())
727 return false;
728
729 return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
730}
731
732static bool mayAlias(MachineInstr *MIa,
733 SmallVectorImpl<MachineInstr *> &MemInsns,
734 const AArch64InstrInfo *TII) {
735 for (auto &MIb : MemInsns)
736 if (mayAlias(MIa, MIb, TII))
737 return true;
738
739 return false;
740}
741
Tim Northover3b0846e2014-05-24 12:50:23 +0000742/// findMatchingInsn - Scan the instructions looking for a load/store that can
743/// be combined with the current instruction into a load/store pair.
744MachineBasicBlock::iterator
745AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000746 LdStPairFlags &Flags, unsigned Limit) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000747 MachineBasicBlock::iterator E = I->getParent()->end();
748 MachineBasicBlock::iterator MBBI = I;
749 MachineInstr *FirstMI = I;
750 ++MBBI;
751
Matthias Braunfa3872e2015-05-18 20:27:55 +0000752 unsigned Opc = FirstMI->getOpcode();
Tilmann Scheller4aad3bd2014-06-04 12:36:28 +0000753 bool MayLoad = FirstMI->mayLoad();
Chad Rosier22eb7102015-08-06 17:37:18 +0000754 bool IsUnscaled = isUnscaledLdSt(FirstMI);
Chad Rosierf77e9092015-08-06 15:50:12 +0000755 unsigned Reg = getLdStRegOp(FirstMI).getReg();
756 unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
757 int Offset = getLdStOffsetOp(FirstMI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +0000758
759 // Early exit if the first instruction modifies the base register.
760 // e.g., ldr x0, [x0]
Tim Northover3b0846e2014-05-24 12:50:23 +0000761 if (FirstMI->modifiesRegister(BaseReg, TRI))
762 return E;
Chad Rosiercaed6db2015-08-10 17:17:19 +0000763
764 // Early exit if the offset if not possible to match. (6 bits of positive
765 // range, plus allow an extra one in case we find a later insn that matches
766 // with Offset-1)
Chad Rosierf11d0402015-10-01 18:17:12 +0000767 int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000768 if (!isSmallTypeLdMerge(Opc) &&
769 !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
Tim Northover3b0846e2014-05-24 12:50:23 +0000770 return E;
771
772 // Track which registers have been modified and used between the first insn
773 // (inclusive) and the second insn.
774 BitVector ModifiedRegs, UsedRegs;
775 ModifiedRegs.resize(TRI->getNumRegs());
776 UsedRegs.resize(TRI->getNumRegs());
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000777
778 // Remember any instructions that read/write memory between FirstMI and MI.
779 SmallVector<MachineInstr *, 4> MemInsns;
780
Tim Northover3b0846e2014-05-24 12:50:23 +0000781 for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
782 MachineInstr *MI = MBBI;
783 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
784 // optimization by changing how far we scan.
785 if (MI->isDebugValue())
786 continue;
787
788 // Now that we know this is a real instruction, count it.
789 ++Count;
790
Chad Rosier08ef4622015-09-03 16:41:28 +0000791 bool CanMergeOpc = Opc == MI->getOpcode();
792 Flags.setSExtIdx(-1);
793 if (!CanMergeOpc) {
794 bool IsValidLdStrOpc;
795 unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
796 assert(IsValidLdStrOpc &&
797 "Given Opc should be a Load or Store with an immediate");
798 // Opc will be the first instruction in the pair.
799 Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
800 CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
801 }
802
803 if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
Chad Rosierc56a9132015-08-10 18:42:45 +0000804 assert(MI->mayLoadOrStore() && "Expected memory operation.");
Tim Northover3b0846e2014-05-24 12:50:23 +0000805 // If we've found another instruction with the same opcode, check to see
806 // if the base and offset are compatible with our starting instruction.
807 // These instructions all have scaled immediate operands, so we just
808 // check for +1/-1. Make sure to check the new instruction offset is
809 // actually an immediate and not a symbolic reference destined for
810 // a relocation.
811 //
812 // Pairwise instructions have a 7-bit signed offset field. Single insns
813 // have a 12-bit unsigned offset field. To be a valid combine, the
814 // final offset must be in range.
Chad Rosierf77e9092015-08-06 15:50:12 +0000815 unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
816 int MIOffset = getLdStOffsetOp(MI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +0000817 if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
818 (Offset + OffsetStride == MIOffset))) {
819 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
820 // If this is a volatile load/store that otherwise matched, stop looking
821 // as something is going on that we don't have enough information to
822 // safely transform. Similarly, stop if we see a hint to avoid pairs.
823 if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
824 return E;
825 // If the resultant immediate offset of merging these instructions
826 // is out of range for a pairwise instruction, bail and keep looking.
Chad Rosier08ef4622015-09-03 16:41:28 +0000827 bool MIIsUnscaled = isUnscaledLdSt(MI);
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000828 bool IsSmallTypeLd = isSmallTypeLdMerge(MI->getOpcode());
829 if (!IsSmallTypeLd &&
830 !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000831 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
Chad Rosierc56a9132015-08-10 18:42:45 +0000832 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000833 continue;
834 }
Jun Bum Limc9879ec2015-10-27 19:16:03 +0000835
836 if (IsSmallTypeLd) {
837 // If the alignment requirements of the larger type scaled load
838 // instruction can't express the scaled offset of the smaller type
839 // input, bail and keep looking.
840 if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
841 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
842 MemInsns.push_back(MI);
843 continue;
844 }
845 } else {
846 // If the alignment requirements of the paired (scaled) instruction
847 // can't express the offset of the unscaled input, bail and keep
848 // looking.
849 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
850 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
851 MemInsns.push_back(MI);
852 continue;
853 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000854 }
855 // If the destination register of the loads is the same register, bail
856 // and keep looking. A load-pair instruction with both destination
857 // registers the same is UNPREDICTABLE and will result in an exception.
Chad Rosierf77e9092015-08-06 15:50:12 +0000858 if (MayLoad && Reg == getLdStRegOp(MI).getReg()) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000859 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
Chad Rosierc56a9132015-08-10 18:42:45 +0000860 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000861 continue;
862 }
863
864 // If the Rt of the second instruction was not modified or used between
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000865 // the two instructions and none of the instructions between the second
866 // and first alias with the second, we can combine the second into the
867 // first.
Chad Rosierf77e9092015-08-06 15:50:12 +0000868 if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
869 !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000870 !mayAlias(MI, MemInsns, TII)) {
Chad Rosier96a18a92015-07-21 17:42:04 +0000871 Flags.setMergeForward(false);
Tim Northover3b0846e2014-05-24 12:50:23 +0000872 return MBBI;
873 }
874
875 // Likewise, if the Rt of the first instruction is not modified or used
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000876 // between the two instructions and none of the instructions between the
877 // first and the second alias with the first, we can combine the first
878 // into the second.
Chad Rosierf77e9092015-08-06 15:50:12 +0000879 if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
Chad Rosier5f668e12015-09-03 14:19:43 +0000880 !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000881 !mayAlias(FirstMI, MemInsns, TII)) {
Chad Rosier96a18a92015-07-21 17:42:04 +0000882 Flags.setMergeForward(true);
Tim Northover3b0846e2014-05-24 12:50:23 +0000883 return MBBI;
884 }
885 // Unable to combine these instructions due to interference in between.
886 // Keep looking.
887 }
888 }
889
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000890 // If the instruction wasn't a matching load or store. Stop searching if we
891 // encounter a call instruction that might modify memory.
892 if (MI->isCall())
Tim Northover3b0846e2014-05-24 12:50:23 +0000893 return E;
894
895 // Update modified / uses register lists.
896 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
897
898 // Otherwise, if the base register is modified, we have no match, so
899 // return early.
900 if (ModifiedRegs[BaseReg])
901 return E;
Chad Rosierce8e5ab2015-05-21 21:36:46 +0000902
903 // Update list of instructions that read/write memory.
904 if (MI->mayLoadOrStore())
905 MemInsns.push_back(MI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000906 }
907 return E;
908}
909
910MachineBasicBlock::iterator
Chad Rosier2dfd3542015-09-23 13:51:44 +0000911AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
912 MachineBasicBlock::iterator Update,
913 bool IsPreIdx) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000914 assert((Update->getOpcode() == AArch64::ADDXri ||
915 Update->getOpcode() == AArch64::SUBXri) &&
916 "Unexpected base register update instruction to merge!");
917 MachineBasicBlock::iterator NextI = I;
918 // Return the instruction following the merged instruction, which is
919 // the instruction following our unmerged load. Unless that's the add/sub
920 // instruction we're merging, in which case it's the one after that.
921 if (++NextI == Update)
922 ++NextI;
923
924 int Value = Update->getOperand(2).getImm();
925 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
Chad Rosier2dfd3542015-09-23 13:51:44 +0000926 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
Tim Northover3b0846e2014-05-24 12:50:23 +0000927 if (Update->getOpcode() == AArch64::SUBXri)
928 Value = -Value;
929
Chad Rosier2dfd3542015-09-23 13:51:44 +0000930 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
931 : getPostIndexedOpcode(I->getOpcode());
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000932 MachineInstrBuilder MIB;
933 if (!isPairedLdSt(I)) {
934 // Non-paired instruction.
935 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
936 .addOperand(getLdStRegOp(Update))
937 .addOperand(getLdStRegOp(I))
938 .addOperand(getLdStBaseOp(I))
939 .addImm(Value);
940 } else {
941 // Paired instruction.
Chad Rosier32d4d372015-09-29 16:07:32 +0000942 int Scale = getMemScale(I);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000943 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
944 .addOperand(getLdStRegOp(Update))
945 .addOperand(getLdStRegOp(I, 0))
946 .addOperand(getLdStRegOp(I, 1))
947 .addOperand(getLdStBaseOp(I))
948 .addImm(Value / Scale);
949 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000950 (void)MIB;
951
Chad Rosier2dfd3542015-09-23 13:51:44 +0000952 if (IsPreIdx)
953 DEBUG(dbgs() << "Creating pre-indexed load/store.");
954 else
955 DEBUG(dbgs() << "Creating post-indexed load/store.");
Tim Northover3b0846e2014-05-24 12:50:23 +0000956 DEBUG(dbgs() << " Replacing instructions:\n ");
957 DEBUG(I->print(dbgs()));
958 DEBUG(dbgs() << " ");
959 DEBUG(Update->print(dbgs()));
960 DEBUG(dbgs() << " with instruction:\n ");
961 DEBUG(((MachineInstr *)MIB)->print(dbgs()));
962 DEBUG(dbgs() << "\n");
963
964 // Erase the old instructions for the block.
965 I->eraseFromParent();
966 Update->eraseFromParent();
967
968 return NextI;
969}
970
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000971bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
972 MachineInstr *MI,
973 unsigned BaseReg, int Offset) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000974 switch (MI->getOpcode()) {
975 default:
976 break;
977 case AArch64::SUBXri:
978 // Negate the offset for a SUB instruction.
979 Offset *= -1;
980 // FALLTHROUGH
981 case AArch64::ADDXri:
982 // Make sure it's a vanilla immediate operand, not a relocation or
983 // anything else we can't handle.
984 if (!MI->getOperand(2).isImm())
985 break;
986 // Watch out for 1 << 12 shifted value.
987 if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
988 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +0000989
990 // The update instruction source and destination register must be the
991 // same as the load/store base register.
992 if (MI->getOperand(0).getReg() != BaseReg ||
993 MI->getOperand(1).getReg() != BaseReg)
994 break;
995
996 bool IsPairedInsn = isPairedLdSt(MemMI);
997 int UpdateOffset = MI->getOperand(2).getImm();
998 // For non-paired load/store instructions, the immediate must fit in a
999 // signed 9-bit integer.
1000 if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
1001 break;
1002
1003 // For paired load/store instructions, the immediate must be a multiple of
1004 // the scaling factor. The scaled offset must also fit into a signed 7-bit
1005 // integer.
1006 if (IsPairedInsn) {
Chad Rosier32d4d372015-09-29 16:07:32 +00001007 int Scale = getMemScale(MemMI);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001008 if (UpdateOffset % Scale != 0)
1009 break;
1010
1011 int ScaledOffset = UpdateOffset / Scale;
1012 if (ScaledOffset > 64 || ScaledOffset < -64)
1013 break;
Tim Northover3b0846e2014-05-24 12:50:23 +00001014 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001015
1016 // If we have a non-zero Offset, we check that it matches the amount
1017 // we're adding to the register.
1018 if (!Offset || Offset == MI->getOperand(2).getImm())
1019 return true;
Tim Northover3b0846e2014-05-24 12:50:23 +00001020 break;
1021 }
1022 return false;
1023}
1024
1025MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001026 MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001027 MachineBasicBlock::iterator E = I->getParent()->end();
1028 MachineInstr *MemMI = I;
1029 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001030
Chad Rosierf77e9092015-08-06 15:50:12 +00001031 unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001032 int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
Tim Northover3b0846e2014-05-24 12:50:23 +00001033
Chad Rosierb7c5b912015-10-01 13:43:05 +00001034 // Scan forward looking for post-index opportunities. Updating instructions
1035 // can't be formed if the memory instruction doesn't have the offset we're
1036 // looking for.
1037 if (MIUnscaledOffset != UnscaledOffset)
1038 return E;
1039
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001040 // If the base register overlaps a destination register, we can't
Tim Northover3b0846e2014-05-24 12:50:23 +00001041 // merge the update.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001042 bool IsPairedInsn = isPairedLdSt(MemMI);
1043 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1044 unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
1045 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1046 return E;
1047 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001048
Tim Northover3b0846e2014-05-24 12:50:23 +00001049 // Track which registers have been modified and used between the first insn
1050 // (inclusive) and the second insn.
1051 BitVector ModifiedRegs, UsedRegs;
1052 ModifiedRegs.resize(TRI->getNumRegs());
1053 UsedRegs.resize(TRI->getNumRegs());
1054 ++MBBI;
1055 for (unsigned Count = 0; MBBI != E; ++MBBI) {
1056 MachineInstr *MI = MBBI;
1057 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
1058 // optimization by changing how far we scan.
1059 if (MI->isDebugValue())
1060 continue;
1061
1062 // Now that we know this is a real instruction, count it.
1063 ++Count;
1064
1065 // If we found a match, return it.
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001066 if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset))
Tim Northover3b0846e2014-05-24 12:50:23 +00001067 return MBBI;
1068
1069 // Update the status of what the instruction clobbered and used.
1070 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1071
1072 // Otherwise, if the base register is used or modified, we have no match, so
1073 // return early.
1074 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1075 return E;
1076 }
1077 return E;
1078}
1079
1080MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1081 MachineBasicBlock::iterator I, unsigned Limit) {
1082 MachineBasicBlock::iterator B = I->getParent()->begin();
1083 MachineBasicBlock::iterator E = I->getParent()->end();
1084 MachineInstr *MemMI = I;
1085 MachineBasicBlock::iterator MBBI = I;
Tim Northover3b0846e2014-05-24 12:50:23 +00001086
Chad Rosierf77e9092015-08-06 15:50:12 +00001087 unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
1088 int Offset = getLdStOffsetOp(MemMI).getImm();
Tim Northover3b0846e2014-05-24 12:50:23 +00001089
1090 // If the load/store is the first instruction in the block, there's obviously
1091 // not any matching update. Ditto if the memory offset isn't zero.
1092 if (MBBI == B || Offset != 0)
1093 return E;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001094 // If the base register overlaps a destination register, we can't
Tim Northover3b0846e2014-05-24 12:50:23 +00001095 // merge the update.
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001096 bool IsPairedInsn = isPairedLdSt(MemMI);
1097 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1098 unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
1099 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1100 return E;
1101 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001102
1103 // Track which registers have been modified and used between the first insn
1104 // (inclusive) and the second insn.
1105 BitVector ModifiedRegs, UsedRegs;
1106 ModifiedRegs.resize(TRI->getNumRegs());
1107 UsedRegs.resize(TRI->getNumRegs());
1108 --MBBI;
1109 for (unsigned Count = 0; MBBI != B; --MBBI) {
1110 MachineInstr *MI = MBBI;
1111 // Skip DBG_VALUE instructions. Otherwise debug info can affect the
1112 // optimization by changing how far we scan.
1113 if (MI->isDebugValue())
1114 continue;
1115
1116 // Now that we know this is a real instruction, count it.
1117 ++Count;
1118
1119 // If we found a match, return it.
Chad Rosier11c825f2015-09-30 19:44:40 +00001120 if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
Tim Northover3b0846e2014-05-24 12:50:23 +00001121 return MBBI;
1122
1123 // Update the status of what the instruction clobbered and used.
1124 trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
1125
1126 // Otherwise, if the base register is used or modified, we have no match, so
1127 // return early.
1128 if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
1129 return E;
1130 }
1131 return E;
1132}
1133
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001134bool AArch64LoadStoreOpt::tryToMergeLdStInst(
1135 MachineBasicBlock::iterator &MBBI) {
1136 MachineInstr *MI = MBBI;
1137 MachineBasicBlock::iterator E = MI->getParent()->end();
1138 // If this is a volatile load/store, don't mess with it.
1139 if (MI->hasOrderedMemoryRef())
1140 return false;
1141
1142 // Make sure this is a reg+imm (as opposed to an address reloc).
1143 if (!getLdStOffsetOp(MI).isImm())
1144 return false;
1145
1146 // Check if this load/store has a hint to avoid pair formation.
1147 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1148 if (TII->isLdStPairSuppressed(MI))
1149 return false;
1150
1151 // Look ahead up to ScanLimit instructions for a pairable instruction.
1152 LdStPairFlags Flags;
1153 MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit);
1154 if (Paired != E) {
1155 if (isSmallTypeLdMerge(MI)) {
1156 ++NumSmallTypeMerged;
1157 } else {
1158 ++NumPairCreated;
1159 if (isUnscaledLdSt(MI))
1160 ++NumUnscaledPairCreated;
1161 }
1162
1163 // Merge the loads into a pair. Keeping the iterator straight is a
1164 // pain, so we let the merge routine tell us what the next instruction
1165 // is after it's done mucking about.
1166 MBBI = mergePairedInsns(MBBI, Paired, Flags);
1167 return true;
1168 }
1169 return false;
1170}
1171
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00001172bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1173 bool enableNarrowLdOpt) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001174 bool Modified = false;
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001175 // Three tranformations to do here:
1176 // 1) Find halfword loads that can be merged into a single 32-bit word load
1177 // with bitfield extract instructions.
1178 // e.g.,
1179 // ldrh w0, [x2]
1180 // ldrh w1, [x2, #2]
1181 // ; becomes
1182 // ldr w0, [x2]
1183 // ubfx w1, w0, #16, #16
1184 // and w0, w0, #ffff
1185 // 2) Find loads and stores that can be merged into a single load or store
Tim Northover3b0846e2014-05-24 12:50:23 +00001186 // pair instruction.
1187 // e.g.,
1188 // ldr x0, [x2]
1189 // ldr x1, [x2, #8]
1190 // ; becomes
1191 // ldp x0, x1, [x2]
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001192 // 3) Find base register updates that can be merged into the load or store
Tim Northover3b0846e2014-05-24 12:50:23 +00001193 // as a base-reg writeback.
1194 // e.g.,
1195 // ldr x0, [x2]
1196 // add x2, x2, #4
1197 // ; becomes
1198 // ldr x0, [x2], #4
1199
1200 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00001201 enableNarrowLdOpt && MBBI != E;) {
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001202 MachineInstr *MI = MBBI;
1203 switch (MI->getOpcode()) {
1204 default:
1205 // Just move on to the next instruction.
1206 ++MBBI;
1207 break;
1208 // Scaled instructions.
1209 case AArch64::LDRHHui:
1210 // Unscaled instructions.
1211 case AArch64::LDURHHi: {
1212 if (tryToMergeLdStInst(MBBI)) {
1213 Modified = true;
1214 break;
1215 }
1216 ++MBBI;
1217 break;
1218 }
1219 // FIXME: Do the other instructions.
1220 }
1221 }
1222
1223 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
Tim Northover3b0846e2014-05-24 12:50:23 +00001224 MBBI != E;) {
1225 MachineInstr *MI = MBBI;
1226 switch (MI->getOpcode()) {
1227 default:
1228 // Just move on to the next instruction.
1229 ++MBBI;
1230 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001231 // Scaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001232 case AArch64::STRSui:
1233 case AArch64::STRDui:
1234 case AArch64::STRQui:
1235 case AArch64::STRXui:
1236 case AArch64::STRWui:
1237 case AArch64::LDRSui:
1238 case AArch64::LDRDui:
1239 case AArch64::LDRQui:
1240 case AArch64::LDRXui:
1241 case AArch64::LDRWui:
Quentin Colombet29f55332015-01-24 01:25:54 +00001242 case AArch64::LDRSWui:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001243 // Unscaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001244 case AArch64::STURSi:
1245 case AArch64::STURDi:
1246 case AArch64::STURQi:
1247 case AArch64::STURWi:
1248 case AArch64::STURXi:
1249 case AArch64::LDURSi:
1250 case AArch64::LDURDi:
1251 case AArch64::LDURQi:
1252 case AArch64::LDURWi:
Quentin Colombet29f55332015-01-24 01:25:54 +00001253 case AArch64::LDURXi:
1254 case AArch64::LDURSWi: {
Jun Bum Limc9879ec2015-10-27 19:16:03 +00001255 if (tryToMergeLdStInst(MBBI)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001256 Modified = true;
Tim Northover3b0846e2014-05-24 12:50:23 +00001257 break;
1258 }
1259 ++MBBI;
1260 break;
1261 }
1262 // FIXME: Do the other instructions.
1263 }
1264 }
1265
1266 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1267 MBBI != E;) {
1268 MachineInstr *MI = MBBI;
1269 // Do update merging. It's simpler to keep this separate from the above
1270 // switch, though not strictly necessary.
Matthias Braunfa3872e2015-05-18 20:27:55 +00001271 unsigned Opc = MI->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +00001272 switch (Opc) {
1273 default:
1274 // Just move on to the next instruction.
1275 ++MBBI;
1276 break;
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001277 // Scaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001278 case AArch64::STRSui:
1279 case AArch64::STRDui:
1280 case AArch64::STRQui:
1281 case AArch64::STRXui:
1282 case AArch64::STRWui:
Chad Rosierdabe2532015-09-29 18:26:15 +00001283 case AArch64::STRHHui:
1284 case AArch64::STRBBui:
Tim Northover3b0846e2014-05-24 12:50:23 +00001285 case AArch64::LDRSui:
1286 case AArch64::LDRDui:
1287 case AArch64::LDRQui:
1288 case AArch64::LDRXui:
1289 case AArch64::LDRWui:
Chad Rosierdabe2532015-09-29 18:26:15 +00001290 case AArch64::LDRHHui:
1291 case AArch64::LDRBBui:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001292 // Unscaled instructions.
Tim Northover3b0846e2014-05-24 12:50:23 +00001293 case AArch64::STURSi:
1294 case AArch64::STURDi:
1295 case AArch64::STURQi:
1296 case AArch64::STURWi:
1297 case AArch64::STURXi:
1298 case AArch64::LDURSi:
1299 case AArch64::LDURDi:
1300 case AArch64::LDURQi:
1301 case AArch64::LDURWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001302 case AArch64::LDURXi:
1303 // Paired instructions.
1304 case AArch64::LDPSi:
Chad Rosier43150122015-09-29 20:39:55 +00001305 case AArch64::LDPSWi:
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001306 case AArch64::LDPDi:
1307 case AArch64::LDPQi:
1308 case AArch64::LDPWi:
1309 case AArch64::LDPXi:
1310 case AArch64::STPSi:
1311 case AArch64::STPDi:
1312 case AArch64::STPQi:
1313 case AArch64::STPWi:
1314 case AArch64::STPXi: {
Tim Northover3b0846e2014-05-24 12:50:23 +00001315 // Make sure this is a reg+imm (as opposed to an address reloc).
Chad Rosierf77e9092015-08-06 15:50:12 +00001316 if (!getLdStOffsetOp(MI).isImm()) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001317 ++MBBI;
1318 break;
1319 }
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001320 // Look forward to try to form a post-index instruction. For example,
1321 // ldr x0, [x20]
1322 // add x20, x20, #32
1323 // merged into:
1324 // ldr x0, [x20], #32
Tim Northover3b0846e2014-05-24 12:50:23 +00001325 MachineBasicBlock::iterator Update =
1326 findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
1327 if (Update != E) {
1328 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001329 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
Tim Northover3b0846e2014-05-24 12:50:23 +00001330 Modified = true;
1331 ++NumPostFolded;
1332 break;
1333 }
1334 // Don't know how to handle pre/post-index versions, so move to the next
1335 // instruction.
Chad Rosier22eb7102015-08-06 17:37:18 +00001336 if (isUnscaledLdSt(Opc)) {
Tim Northover3b0846e2014-05-24 12:50:23 +00001337 ++MBBI;
1338 break;
1339 }
1340
1341 // Look back to try to find a pre-index instruction. For example,
1342 // add x0, x0, #8
1343 // ldr x1, [x0]
1344 // merged into:
1345 // ldr x1, [x0, #8]!
1346 Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
1347 if (Update != E) {
1348 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001349 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Tim Northover3b0846e2014-05-24 12:50:23 +00001350 Modified = true;
1351 ++NumPreFolded;
1352 break;
1353 }
Chad Rosier7a83d772015-10-01 13:09:44 +00001354 // The immediate in the load/store is scaled by the size of the memory
1355 // operation. The immediate in the add we're looking for,
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001356 // however, is not, so adjust here.
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001357 int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
Chad Rosier1bbd7fb2015-09-25 17:48:17 +00001358
Tim Northover3b0846e2014-05-24 12:50:23 +00001359 // Look forward to try to find a post-index instruction. For example,
1360 // ldr x1, [x0, #64]
1361 // add x0, x0, #64
1362 // merged into:
1363 // ldr x1, [x0, #64]!
Chad Rosier0b15e7c2015-10-01 13:33:31 +00001364 Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset);
Tim Northover3b0846e2014-05-24 12:50:23 +00001365 if (Update != E) {
1366 // Merge the update into the ld/st.
Chad Rosier2dfd3542015-09-23 13:51:44 +00001367 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Tim Northover3b0846e2014-05-24 12:50:23 +00001368 Modified = true;
1369 ++NumPreFolded;
1370 break;
1371 }
1372
1373 // Nothing found. Just move to the next instruction.
1374 ++MBBI;
1375 break;
1376 }
1377 // FIXME: Do the other instructions.
1378 }
1379 }
1380
1381 return Modified;
1382}
1383
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00001384bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
1385 const AArch64Subtarget *SubTarget =
1386 &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
1387 bool ProfitableArch = SubTarget->isCortexA57();
1388 // FIXME: The benefit from converting narrow loads into a wider load could be
1389 // microarchitectural as it assumes that a single load with two bitfield
1390 // extracts is cheaper than two narrow loads. Currently, this conversion is
1391 // enabled only in cortex-a57 on which performance benefits were verified.
1392 return ProfitableArch & (!SubTarget->requiresStrictAlign());
1393}
1394
Tim Northover3b0846e2014-05-24 12:50:23 +00001395bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Oliver Stannardd414c992015-11-10 11:04:18 +00001396 Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
1397 TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
1398 TRI = Subtarget->getRegisterInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001399
1400 bool Modified = false;
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00001401 bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
Tim Northover3b0846e2014-05-24 12:50:23 +00001402 for (auto &MBB : Fn)
Jun Bum Lim22fe15e2015-11-06 16:27:47 +00001403 Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
Tim Northover3b0846e2014-05-24 12:50:23 +00001404
1405 return Modified;
1406}
1407
1408// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
1409// loads and stores near one another?
1410
Chad Rosier43f5c842015-08-05 12:40:13 +00001411/// createAArch64LoadStoreOptimizationPass - returns an instance of the
1412/// load / store optimization pass.
Tim Northover3b0846e2014-05-24 12:50:23 +00001413FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
1414 return new AArch64LoadStoreOpt();
1415}