blob: c3842785f2be28fde32494f68e50594faded7184 [file] [log] [blame]
Eugene Zelenko96d933d2017-07-25 23:51:02 +00001//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
Tim Northover3b0846e2014-05-24 12:50:23 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that expands pseudo instructions into target
11// instructions to allow proper scheduling and other late optimizations. This
12// pass should be run after register allocation but before the post-regalloc
13// scheduling pass.
14//
15//===----------------------------------------------------------------------===//
16
Tim Northover3b0846e2014-05-24 12:50:23 +000017#include "AArch64InstrInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000018#include "AArch64Subtarget.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000019#include "MCTargetDesc/AArch64AddressingModes.h"
Chad Rosier862a4122017-03-27 15:52:38 +000020#include "Utils/AArch64BaseInfo.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000021#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Triple.h"
Tim Northovercdf15292016-04-14 17:03:29 +000023#include "llvm/CodeGen/LivePhysRegs.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000024#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000026#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000027#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000028#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000029#include "llvm/CodeGen/MachineOperand.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +000030#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000031#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
34#include "llvm/Support/CodeGen.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000035#include "llvm/Support/MathExtras.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000036#include "llvm/Target/TargetMachine.h"
Eugene Zelenko96d933d2017-07-25 23:51:02 +000037#include <cassert>
38#include <cstdint>
39#include <iterator>
40#include <limits>
41#include <utility>
42
Tim Northover3b0846e2014-05-24 12:50:23 +000043using namespace llvm;
44
Chad Rosier9378c162015-08-05 14:22:53 +000045#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46
Tim Northover3b0846e2014-05-24 12:50:23 +000047namespace {
Eugene Zelenko96d933d2017-07-25 23:51:02 +000048
Tim Northover3b0846e2014-05-24 12:50:23 +000049class AArch64ExpandPseudo : public MachineFunctionPass {
50public:
Eugene Zelenko96d933d2017-07-25 23:51:02 +000051 const AArch64InstrInfo *TII;
52
Tim Northover3b0846e2014-05-24 12:50:23 +000053 static char ID;
Eugene Zelenko96d933d2017-07-25 23:51:02 +000054
Chad Rosier9378c162015-08-05 14:22:53 +000055 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57 }
Tim Northover3b0846e2014-05-24 12:50:23 +000058
Tim Northover3b0846e2014-05-24 12:50:23 +000059 bool runOnMachineFunction(MachineFunction &Fn) override;
60
Mehdi Amini117296c2016-10-01 02:56:57 +000061 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
Tim Northover3b0846e2014-05-24 12:50:23 +000062
63private:
64 bool expandMBB(MachineBasicBlock &MBB);
Tim Northovercdf15292016-04-14 17:03:29 +000065 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66 MachineBasicBlock::iterator &NextMBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +000067 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68 unsigned BitSize);
Tim Northovercdf15292016-04-14 17:03:29 +000069
70 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
71 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
72 unsigned ExtendImm, unsigned ZeroReg,
73 MachineBasicBlock::iterator &NextMBBI);
74 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
75 MachineBasicBlock::iterator MBBI,
76 MachineBasicBlock::iterator &NextMBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +000077};
Eugene Zelenko96d933d2017-07-25 23:51:02 +000078
79} // end anonymous namespace
80
Tim Northover3b0846e2014-05-24 12:50:23 +000081char AArch64ExpandPseudo::ID = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +000082
Chad Rosier9378c162015-08-05 14:22:53 +000083INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
84 AARCH64_EXPAND_PSEUDO_NAME, false, false)
85
Tim Northover3b0846e2014-05-24 12:50:23 +000086/// \brief Transfer implicit operands on the pseudo instruction to the
87/// instructions created from the expansion.
88static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
89 MachineInstrBuilder &DefMI) {
90 const MCInstrDesc &Desc = OldMI.getDesc();
91 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
92 ++i) {
93 const MachineOperand &MO = OldMI.getOperand(i);
94 assert(MO.isReg() && MO.getReg());
95 if (MO.isUse())
Diana Picus116bbab2017-01-13 09:58:52 +000096 UseMI.add(MO);
Tim Northover3b0846e2014-05-24 12:50:23 +000097 else
Diana Picus116bbab2017-01-13 09:58:52 +000098 DefMI.add(MO);
Tim Northover3b0846e2014-05-24 12:50:23 +000099 }
100}
101
102/// \brief Helper function which extracts the specified 16-bit chunk from a
103/// 64-bit value.
104static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
105 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
106
107 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
108}
109
110/// \brief Helper function which replicates a 16-bit chunk within a 64-bit
111/// value. Indices correspond to element numbers in a v4i16.
112static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
113 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
114 const unsigned ShiftAmt = ToIdx * 16;
115
116 // Replicate the source chunk to the destination position.
117 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
118 // Clear the destination chunk.
119 Imm &= ~(0xFFFFLL << ShiftAmt);
120 // Insert the replicated chunk.
121 return Imm | Chunk;
122}
123
124/// \brief Helper function which tries to materialize a 64-bit value with an
125/// ORR + MOVK instruction sequence.
126static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
127 MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator &MBBI,
129 const AArch64InstrInfo *TII, unsigned ChunkIdx) {
130 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
131 const unsigned ShiftAmt = ChunkIdx * 16;
132
133 uint64_t Encoding;
134 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
135 // Create the ORR-immediate instruction.
136 MachineInstrBuilder MIB =
137 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
Diana Picus116bbab2017-01-13 09:58:52 +0000138 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000139 .addReg(AArch64::XZR)
140 .addImm(Encoding);
141
142 // Create the MOVK instruction.
143 const unsigned Imm16 = getChunk(UImm, ChunkIdx);
144 const unsigned DstReg = MI.getOperand(0).getReg();
145 const bool DstIsDead = MI.getOperand(0).isDead();
146 MachineInstrBuilder MIB1 =
147 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
148 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
149 .addReg(DstReg)
150 .addImm(Imm16)
151 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
152
153 transferImpOps(MI, MIB, MIB1);
154 MI.eraseFromParent();
155 return true;
156 }
157
158 return false;
159}
160
161/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
162/// can be materialized with an ORR instruction.
163static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
164 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
165
166 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
167}
168
169/// \brief Check for identical 16-bit chunks within the constant and if so
170/// materialize them with a single ORR instruction. The remaining one or two
171/// 16-bit chunks will be materialized with MOVK instructions.
172///
173/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
174/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
175/// an ORR instruction.
Tim Northover3b0846e2014-05-24 12:50:23 +0000176static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
177 MachineBasicBlock &MBB,
178 MachineBasicBlock::iterator &MBBI,
179 const AArch64InstrInfo *TII) {
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000180 using CountMap = DenseMap<uint64_t, unsigned>;
181
Tim Northover3b0846e2014-05-24 12:50:23 +0000182 CountMap Counts;
183
184 // Scan the constant and count how often every chunk occurs.
185 for (unsigned Idx = 0; Idx < 4; ++Idx)
186 ++Counts[getChunk(UImm, Idx)];
187
188 // Traverse the chunks to find one which occurs more than once.
189 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
190 Chunk != End; ++Chunk) {
191 const uint64_t ChunkVal = Chunk->first;
192 const unsigned Count = Chunk->second;
193
194 uint64_t Encoding = 0;
195
196 // We are looking for chunks which have two or three instances and can be
197 // materialized with an ORR instruction.
198 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
199 continue;
200
201 const bool CountThree = Count == 3;
202 // Create the ORR-immediate instruction.
203 MachineInstrBuilder MIB =
204 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
Diana Picus116bbab2017-01-13 09:58:52 +0000205 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000206 .addReg(AArch64::XZR)
207 .addImm(Encoding);
208
209 const unsigned DstReg = MI.getOperand(0).getReg();
210 const bool DstIsDead = MI.getOperand(0).isDead();
211
212 unsigned ShiftAmt = 0;
213 uint64_t Imm16 = 0;
214 // Find the first chunk not materialized with the ORR instruction.
215 for (; ShiftAmt < 64; ShiftAmt += 16) {
216 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
217
218 if (Imm16 != ChunkVal)
219 break;
220 }
221
222 // Create the first MOVK instruction.
223 MachineInstrBuilder MIB1 =
224 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
225 .addReg(DstReg,
226 RegState::Define | getDeadRegState(DstIsDead && CountThree))
227 .addReg(DstReg)
228 .addImm(Imm16)
229 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
230
231 // In case we have three instances the whole constant is now materialized
232 // and we can exit.
233 if (CountThree) {
234 transferImpOps(MI, MIB, MIB1);
235 MI.eraseFromParent();
236 return true;
237 }
238
239 // Find the remaining chunk which needs to be materialized.
240 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
241 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
242
243 if (Imm16 != ChunkVal)
244 break;
245 }
246
247 // Create the second MOVK instruction.
248 MachineInstrBuilder MIB2 =
249 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
250 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
251 .addReg(DstReg)
252 .addImm(Imm16)
253 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
254
255 transferImpOps(MI, MIB, MIB2);
256 MI.eraseFromParent();
257 return true;
258 }
259
260 return false;
261}
262
263/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
264/// starts a contiguous sequence of ones if we look at the bits from the LSB
265/// towards the MSB.
266static bool isStartChunk(uint64_t Chunk) {
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000267 if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
Tim Northover3b0846e2014-05-24 12:50:23 +0000268 return false;
269
Benjamin Kramer5f6a9072015-02-12 15:35:40 +0000270 return isMask_64(~Chunk);
Tim Northover3b0846e2014-05-24 12:50:23 +0000271}
272
273/// \brief Check whether this chunk matches the pattern '0...1...' This pattern
274/// ends a contiguous sequence of ones if we look at the bits from the LSB
275/// towards the MSB.
276static bool isEndChunk(uint64_t Chunk) {
Eugene Zelenko96d933d2017-07-25 23:51:02 +0000277 if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
Tim Northover3b0846e2014-05-24 12:50:23 +0000278 return false;
279
Benjamin Kramer5f6a9072015-02-12 15:35:40 +0000280 return isMask_64(Chunk);
Tim Northover3b0846e2014-05-24 12:50:23 +0000281}
282
283/// \brief Clear or set all bits in the chunk at the given index.
284static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
285 const uint64_t Mask = 0xFFFF;
286
287 if (Clear)
288 // Clear chunk in the immediate.
289 Imm &= ~(Mask << (Idx * 16));
290 else
291 // Set all bits in the immediate for the particular chunk.
292 Imm |= Mask << (Idx * 16);
293
294 return Imm;
295}
296
297/// \brief Check whether the constant contains a sequence of contiguous ones,
298/// which might be interrupted by one or two chunks. If so, materialize the
299/// sequence of contiguous ones with an ORR instruction.
300/// Materialize the chunks which are either interrupting the sequence or outside
301/// of the sequence with a MOVK instruction.
302///
303/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
304/// which ends the sequence (0...1...). Then we are looking for constants which
305/// contain at least one S and E chunk.
306/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
307///
308/// We are also looking for constants like |S|A|B|E| where the contiguous
309/// sequence of ones wraps around the MSB into the LSB.
Tim Northover3b0846e2014-05-24 12:50:23 +0000310static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
311 MachineBasicBlock &MBB,
312 MachineBasicBlock::iterator &MBBI,
313 const AArch64InstrInfo *TII) {
314 const int NotSet = -1;
315 const uint64_t Mask = 0xFFFF;
316
317 int StartIdx = NotSet;
318 int EndIdx = NotSet;
319 // Try to find the chunks which start/end a contiguous sequence of ones.
320 for (int Idx = 0; Idx < 4; ++Idx) {
321 int64_t Chunk = getChunk(UImm, Idx);
322 // Sign extend the 16-bit chunk to 64-bit.
323 Chunk = (Chunk << 48) >> 48;
324
325 if (isStartChunk(Chunk))
326 StartIdx = Idx;
327 else if (isEndChunk(Chunk))
328 EndIdx = Idx;
329 }
330
331 // Early exit in case we can't find a start/end chunk.
332 if (StartIdx == NotSet || EndIdx == NotSet)
333 return false;
334
335 // Outside of the contiguous sequence of ones everything needs to be zero.
336 uint64_t Outside = 0;
337 // Chunks between the start and end chunk need to have all their bits set.
338 uint64_t Inside = Mask;
339
340 // If our contiguous sequence of ones wraps around from the MSB into the LSB,
341 // just swap indices and pretend we are materializing a contiguous sequence
342 // of zeros surrounded by a contiguous sequence of ones.
343 if (StartIdx > EndIdx) {
344 std::swap(StartIdx, EndIdx);
345 std::swap(Outside, Inside);
346 }
347
348 uint64_t OrrImm = UImm;
349 int FirstMovkIdx = NotSet;
350 int SecondMovkIdx = NotSet;
351
352 // Find out which chunks we need to patch up to obtain a contiguous sequence
353 // of ones.
354 for (int Idx = 0; Idx < 4; ++Idx) {
355 const uint64_t Chunk = getChunk(UImm, Idx);
356
357 // Check whether we are looking at a chunk which is not part of the
358 // contiguous sequence of ones.
359 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
360 OrrImm = updateImm(OrrImm, Idx, Outside == 0);
361
362 // Remember the index we need to patch.
363 if (FirstMovkIdx == NotSet)
364 FirstMovkIdx = Idx;
365 else
366 SecondMovkIdx = Idx;
367
368 // Check whether we are looking a chunk which is part of the contiguous
369 // sequence of ones.
370 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
371 OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
372
373 // Remember the index we need to patch.
374 if (FirstMovkIdx == NotSet)
375 FirstMovkIdx = Idx;
376 else
377 SecondMovkIdx = Idx;
378 }
379 }
380 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
381
382 // Create the ORR-immediate instruction.
383 uint64_t Encoding = 0;
384 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
385 MachineInstrBuilder MIB =
386 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
Diana Picus116bbab2017-01-13 09:58:52 +0000387 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000388 .addReg(AArch64::XZR)
389 .addImm(Encoding);
390
391 const unsigned DstReg = MI.getOperand(0).getReg();
392 const bool DstIsDead = MI.getOperand(0).isDead();
393
394 const bool SingleMovk = SecondMovkIdx == NotSet;
395 // Create the first MOVK instruction.
396 MachineInstrBuilder MIB1 =
397 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
398 .addReg(DstReg,
399 RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
400 .addReg(DstReg)
401 .addImm(getChunk(UImm, FirstMovkIdx))
402 .addImm(
403 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
404
405 // Early exit in case we only need to emit a single MOVK instruction.
406 if (SingleMovk) {
407 transferImpOps(MI, MIB, MIB1);
408 MI.eraseFromParent();
409 return true;
410 }
411
412 // Create the second MOVK instruction.
413 MachineInstrBuilder MIB2 =
414 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
415 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
416 .addReg(DstReg)
417 .addImm(getChunk(UImm, SecondMovkIdx))
418 .addImm(
419 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
420
421 transferImpOps(MI, MIB, MIB2);
422 MI.eraseFromParent();
423 return true;
424}
425
426/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
427/// real move-immediate instructions to synthesize the immediate.
428bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
429 MachineBasicBlock::iterator MBBI,
430 unsigned BitSize) {
431 MachineInstr &MI = *MBBI;
Tim Northover5dad9df2016-04-01 23:14:52 +0000432 unsigned DstReg = MI.getOperand(0).getReg();
Tim Northover3b0846e2014-05-24 12:50:23 +0000433 uint64_t Imm = MI.getOperand(1).getImm();
434 const unsigned Mask = 0xFFFF;
435
Tim Northover5dad9df2016-04-01 23:14:52 +0000436 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
437 // Useless def, and we don't want to risk creating an invalid ORR (which
438 // would really write to sp).
439 MI.eraseFromParent();
440 return true;
441 }
442
Tim Northover3b0846e2014-05-24 12:50:23 +0000443 // Try a MOVI instruction (aka ORR-immediate with the zero register).
444 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
445 uint64_t Encoding;
446 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
447 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
448 MachineInstrBuilder MIB =
449 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
Diana Picus116bbab2017-01-13 09:58:52 +0000450 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000451 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
452 .addImm(Encoding);
453 transferImpOps(MI, MIB, MIB);
454 MI.eraseFromParent();
455 return true;
456 }
457
458 // Scan the immediate and count the number of 16-bit chunks which are either
459 // all ones or all zeros.
460 unsigned OneChunks = 0;
461 unsigned ZeroChunks = 0;
462 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
463 const unsigned Chunk = (Imm >> Shift) & Mask;
464 if (Chunk == Mask)
465 OneChunks++;
466 else if (Chunk == 0)
467 ZeroChunks++;
468 }
469
470 // Since we can't materialize the constant with a single ORR instruction,
471 // let's see whether we can materialize 3/4 of the constant with an ORR
472 // instruction and use an additional MOVK instruction to materialize the
473 // remaining 1/4.
474 //
475 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
476 //
477 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
478 // we would create the following instruction sequence:
479 //
480 // ORR x0, xzr, |A|X|A|X|
481 // MOVK x0, |B|, LSL #16
482 //
483 // Only look at 64-bit constants which can't be materialized with a single
484 // instruction e.g. which have less than either three all zero or all one
485 // chunks.
486 //
487 // Ignore 32-bit constants here, they always can be materialized with a
488 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
489 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
490 // Thus we fall back to the default code below which in the best case creates
491 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
492 //
493 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
494 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
495 // identical?
496 if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
497 // See if we can come up with a constant which can be materialized with
498 // ORR-immediate by replicating element 3 into element 1.
499 uint64_t OrrImm = replicateChunk(UImm, 3, 1);
500 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
501 return true;
502
503 // See if we can come up with a constant which can be materialized with
504 // ORR-immediate by replicating element 1 into element 3.
505 OrrImm = replicateChunk(UImm, 1, 3);
506 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
507 return true;
508
509 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
510 // identical?
511 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
512 // See if we can come up with a constant which can be materialized with
513 // ORR-immediate by replicating element 2 into element 0.
514 uint64_t OrrImm = replicateChunk(UImm, 2, 0);
515 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
516 return true;
517
518 // See if we can come up with a constant which can be materialized with
519 // ORR-immediate by replicating element 1 into element 3.
520 OrrImm = replicateChunk(UImm, 0, 2);
521 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
522 return true;
523 }
524 }
525
526 // Check for identical 16-bit chunks within the constant and if so materialize
527 // them with a single ORR instruction. The remaining one or two 16-bit chunks
528 // will be materialized with MOVK instructions.
529 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
530 return true;
531
532 // Check whether the constant contains a sequence of contiguous ones, which
533 // might be interrupted by one or two chunks. If so, materialize the sequence
534 // of contiguous ones with an ORR instruction. Materialize the chunks which
535 // are either interrupting the sequence or outside of the sequence with a
536 // MOVK instruction.
537 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
538 return true;
539
540 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
541 // more MOVK instructions to insert additional 16-bit portions into the
542 // lower bits.
543 bool isNeg = false;
544
545 // Use MOVN to materialize the high bits if we have more all one chunks
546 // than all zero chunks.
547 if (OneChunks > ZeroChunks) {
548 isNeg = true;
549 Imm = ~Imm;
550 }
551
552 unsigned FirstOpc;
553 if (BitSize == 32) {
554 Imm &= (1LL << 32) - 1;
555 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
556 } else {
557 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
558 }
559 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
560 unsigned LastShift = 0; // LSL amount for last MOVK
561 if (Imm != 0) {
562 unsigned LZ = countLeadingZeros(Imm);
563 unsigned TZ = countTrailingZeros(Imm);
Evandro Menezes7960b2e2017-01-18 18:57:08 +0000564 Shift = (TZ / 16) * 16;
565 LastShift = ((63 - LZ) / 16) * 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000566 }
567 unsigned Imm16 = (Imm >> Shift) & Mask;
Tim Northover3b0846e2014-05-24 12:50:23 +0000568 bool DstIsDead = MI.getOperand(0).isDead();
569 MachineInstrBuilder MIB1 =
570 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
571 .addReg(DstReg, RegState::Define |
Evandro Menezes7960b2e2017-01-18 18:57:08 +0000572 getDeadRegState(DstIsDead && Shift == LastShift))
Tim Northover3b0846e2014-05-24 12:50:23 +0000573 .addImm(Imm16)
574 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
575
576 // If a MOVN was used for the high bits of a negative value, flip the rest
577 // of the bits back for use with MOVK.
578 if (isNeg)
579 Imm = ~Imm;
580
581 if (Shift == LastShift) {
582 transferImpOps(MI, MIB1, MIB1);
583 MI.eraseFromParent();
584 return true;
585 }
586
587 MachineInstrBuilder MIB2;
588 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
Evandro Menezes7960b2e2017-01-18 18:57:08 +0000589 while (Shift < LastShift) {
590 Shift += 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000591 Imm16 = (Imm >> Shift) & Mask;
592 if (Imm16 == (isNeg ? Mask : 0))
593 continue; // This 16-bit portion is already set correctly.
594 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
595 .addReg(DstReg,
596 RegState::Define |
Evandro Menezes7960b2e2017-01-18 18:57:08 +0000597 getDeadRegState(DstIsDead && Shift == LastShift))
Tim Northover3b0846e2014-05-24 12:50:23 +0000598 .addReg(DstReg)
599 .addImm(Imm16)
600 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
601 }
602
603 transferImpOps(MI, MIB1, MIB2);
604 MI.eraseFromParent();
605 return true;
606}
607
Tim Northovercdf15292016-04-14 17:03:29 +0000608bool AArch64ExpandPseudo::expandCMP_SWAP(
609 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
610 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
611 MachineBasicBlock::iterator &NextMBBI) {
612 MachineInstr &MI = *MBBI;
613 DebugLoc DL = MI.getDebugLoc();
Matthias Braunb4f74222017-05-26 23:48:59 +0000614 const MachineOperand &Dest = MI.getOperand(0);
Tim Northovercdf15292016-04-14 17:03:29 +0000615 unsigned StatusReg = MI.getOperand(1).getReg();
Matthias Braunb4f74222017-05-26 23:48:59 +0000616 bool StatusDead = MI.getOperand(1).isDead();
617 // Duplicating undef operands into 2 instructions does not guarantee the same
618 // value on both; However undef should be replaced by xzr anyway.
619 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
620 unsigned AddrReg = MI.getOperand(2).getReg();
621 unsigned DesiredReg = MI.getOperand(3).getReg();
622 unsigned NewReg = MI.getOperand(4).getReg();
Tim Northovercdf15292016-04-14 17:03:29 +0000623
624 MachineFunction *MF = MBB.getParent();
625 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
626 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
627 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
628
629 MF->insert(++MBB.getIterator(), LoadCmpBB);
630 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
631 MF->insert(++StoreBB->getIterator(), DoneBB);
632
633 // .Lloadcmp:
Matthias Braunb4f74222017-05-26 23:48:59 +0000634 // mov wStatus, 0
Tim Northovercdf15292016-04-14 17:03:29 +0000635 // ldaxr xDest, [xAddr]
636 // cmp xDest, xDesired
637 // b.ne .Ldone
Matthias Braunb4f74222017-05-26 23:48:59 +0000638 if (!StatusDead)
639 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
640 .addImm(0).addImm(0);
Tim Northovercdf15292016-04-14 17:03:29 +0000641 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
Matthias Braunb4f74222017-05-26 23:48:59 +0000642 .addReg(AddrReg);
Tim Northovercdf15292016-04-14 17:03:29 +0000643 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
644 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
Matthias Braunb4f74222017-05-26 23:48:59 +0000645 .addReg(DesiredReg)
Tim Northovercdf15292016-04-14 17:03:29 +0000646 .addImm(ExtendImm);
647 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
648 .addImm(AArch64CC::NE)
649 .addMBB(DoneBB)
650 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
651 LoadCmpBB->addSuccessor(DoneBB);
652 LoadCmpBB->addSuccessor(StoreBB);
653
654 // .Lstore:
655 // stlxr wStatus, xNew, [xAddr]
656 // cbnz wStatus, .Lloadcmp
Matthias Braunb4f74222017-05-26 23:48:59 +0000657 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
658 .addReg(NewReg)
659 .addReg(AddrReg);
Tim Northovercdf15292016-04-14 17:03:29 +0000660 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
Matthias Braunb4f74222017-05-26 23:48:59 +0000661 .addReg(StatusReg, getKillRegState(StatusDead))
Tim Northovercdf15292016-04-14 17:03:29 +0000662 .addMBB(LoadCmpBB);
663 StoreBB->addSuccessor(LoadCmpBB);
664 StoreBB->addSuccessor(DoneBB);
665
666 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
667 DoneBB->transferSuccessors(&MBB);
Tim Northovercdf15292016-04-14 17:03:29 +0000668
Ahmed Bougacha9e714252016-04-27 20:33:02 +0000669 MBB.addSuccessor(LoadCmpBB);
670
Tim Northovercdf15292016-04-14 17:03:29 +0000671 NextMBBI = MBB.end();
672 MI.eraseFromParent();
Matthias Braunb4f74222017-05-26 23:48:59 +0000673
674 // Recompute livein lists.
Matthias Braunb4f74222017-05-26 23:48:59 +0000675 LivePhysRegs LiveRegs;
Matthias Braunc9056b82017-09-06 20:45:24 +0000676 computeAndAddLiveIns(LiveRegs, *DoneBB);
677 computeAndAddLiveIns(LiveRegs, *StoreBB);
678 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000679 // Do an extra pass around the loop to get loop carried registers right.
680 StoreBB->clearLiveIns();
Matthias Braunc9056b82017-09-06 20:45:24 +0000681 computeAndAddLiveIns(LiveRegs, *StoreBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000682 LoadCmpBB->clearLiveIns();
Matthias Braunc9056b82017-09-06 20:45:24 +0000683 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000684
Tim Northovercdf15292016-04-14 17:03:29 +0000685 return true;
686}
687
688bool AArch64ExpandPseudo::expandCMP_SWAP_128(
689 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
690 MachineBasicBlock::iterator &NextMBBI) {
Tim Northovercdf15292016-04-14 17:03:29 +0000691 MachineInstr &MI = *MBBI;
692 DebugLoc DL = MI.getDebugLoc();
693 MachineOperand &DestLo = MI.getOperand(0);
694 MachineOperand &DestHi = MI.getOperand(1);
695 unsigned StatusReg = MI.getOperand(2).getReg();
Matthias Braunb4f74222017-05-26 23:48:59 +0000696 bool StatusDead = MI.getOperand(2).isDead();
697 // Duplicating undef operands into 2 instructions does not guarantee the same
698 // value on both; However undef should be replaced by xzr anyway.
699 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
700 unsigned AddrReg = MI.getOperand(3).getReg();
701 unsigned DesiredLoReg = MI.getOperand(4).getReg();
702 unsigned DesiredHiReg = MI.getOperand(5).getReg();
703 unsigned NewLoReg = MI.getOperand(6).getReg();
704 unsigned NewHiReg = MI.getOperand(7).getReg();
Tim Northovercdf15292016-04-14 17:03:29 +0000705
706 MachineFunction *MF = MBB.getParent();
707 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
708 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
709 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
710
711 MF->insert(++MBB.getIterator(), LoadCmpBB);
712 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
713 MF->insert(++StoreBB->getIterator(), DoneBB);
714
715 // .Lloadcmp:
716 // ldaxp xDestLo, xDestHi, [xAddr]
717 // cmp xDestLo, xDesiredLo
718 // sbcs xDestHi, xDesiredHi
719 // b.ne .Ldone
Tim Northovercdf15292016-04-14 17:03:29 +0000720 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
721 .addReg(DestLo.getReg(), RegState::Define)
722 .addReg(DestHi.getReg(), RegState::Define)
Matthias Braunb4f74222017-05-26 23:48:59 +0000723 .addReg(AddrReg);
Tim Northovercdf15292016-04-14 17:03:29 +0000724 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
725 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
Matthias Braunb4f74222017-05-26 23:48:59 +0000726 .addReg(DesiredLoReg)
Tim Northovercdf15292016-04-14 17:03:29 +0000727 .addImm(0);
Tim Northover5bb87b62016-12-01 21:31:59 +0000728 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
729 .addUse(AArch64::WZR)
730 .addUse(AArch64::WZR)
731 .addImm(AArch64CC::EQ);
732 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
Tim Northovercdf15292016-04-14 17:03:29 +0000733 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
Matthias Braunb4f74222017-05-26 23:48:59 +0000734 .addReg(DesiredHiReg)
Tim Northover5bb87b62016-12-01 21:31:59 +0000735 .addImm(0);
736 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
737 .addUse(StatusReg, RegState::Kill)
738 .addUse(StatusReg, RegState::Kill)
739 .addImm(AArch64CC::EQ);
740 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
Matthias Braunb4f74222017-05-26 23:48:59 +0000741 .addUse(StatusReg, getKillRegState(StatusDead))
Tim Northover5bb87b62016-12-01 21:31:59 +0000742 .addMBB(DoneBB);
Tim Northovercdf15292016-04-14 17:03:29 +0000743 LoadCmpBB->addSuccessor(DoneBB);
744 LoadCmpBB->addSuccessor(StoreBB);
745
746 // .Lstore:
747 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
748 // cbnz wStatus, .Lloadcmp
Tim Northovercdf15292016-04-14 17:03:29 +0000749 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
Matthias Braunb4f74222017-05-26 23:48:59 +0000750 .addReg(NewLoReg)
751 .addReg(NewHiReg)
752 .addReg(AddrReg);
Tim Northovercdf15292016-04-14 17:03:29 +0000753 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
Matthias Braunb4f74222017-05-26 23:48:59 +0000754 .addReg(StatusReg, getKillRegState(StatusDead))
Tim Northovercdf15292016-04-14 17:03:29 +0000755 .addMBB(LoadCmpBB);
756 StoreBB->addSuccessor(LoadCmpBB);
757 StoreBB->addSuccessor(DoneBB);
758
759 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
760 DoneBB->transferSuccessors(&MBB);
Tim Northovercdf15292016-04-14 17:03:29 +0000761
Ahmed Bougacha9e714252016-04-27 20:33:02 +0000762 MBB.addSuccessor(LoadCmpBB);
763
Tim Northovercdf15292016-04-14 17:03:29 +0000764 NextMBBI = MBB.end();
765 MI.eraseFromParent();
Matthias Braunb4f74222017-05-26 23:48:59 +0000766
767 // Recompute liveness bottom up.
Matthias Braunb4f74222017-05-26 23:48:59 +0000768 LivePhysRegs LiveRegs;
Matthias Braunc9056b82017-09-06 20:45:24 +0000769 computeAndAddLiveIns(LiveRegs, *DoneBB);
770 computeAndAddLiveIns(LiveRegs, *StoreBB);
771 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000772 // Do an extra pass in the loop to get the loop carried dependencies right.
773 StoreBB->clearLiveIns();
Matthias Braunc9056b82017-09-06 20:45:24 +0000774 computeAndAddLiveIns(LiveRegs, *StoreBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000775 LoadCmpBB->clearLiveIns();
Matthias Braunc9056b82017-09-06 20:45:24 +0000776 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
Matthias Braunb4f74222017-05-26 23:48:59 +0000777
Tim Northovercdf15292016-04-14 17:03:29 +0000778 return true;
779}
780
Tim Northover3b0846e2014-05-24 12:50:23 +0000781/// \brief If MBBI references a pseudo instruction that should be expanded here,
782/// do the expansion and return true. Otherwise return false.
783bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
Tim Northovercdf15292016-04-14 17:03:29 +0000784 MachineBasicBlock::iterator MBBI,
785 MachineBasicBlock::iterator &NextMBBI) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000786 MachineInstr &MI = *MBBI;
787 unsigned Opcode = MI.getOpcode();
788 switch (Opcode) {
789 default:
790 break;
791
792 case AArch64::ADDWrr:
793 case AArch64::SUBWrr:
794 case AArch64::ADDXrr:
795 case AArch64::SUBXrr:
796 case AArch64::ADDSWrr:
797 case AArch64::SUBSWrr:
798 case AArch64::ADDSXrr:
799 case AArch64::SUBSXrr:
800 case AArch64::ANDWrr:
801 case AArch64::ANDXrr:
802 case AArch64::BICWrr:
803 case AArch64::BICXrr:
804 case AArch64::ANDSWrr:
805 case AArch64::ANDSXrr:
806 case AArch64::BICSWrr:
807 case AArch64::BICSXrr:
808 case AArch64::EONWrr:
809 case AArch64::EONXrr:
810 case AArch64::EORWrr:
811 case AArch64::EORXrr:
812 case AArch64::ORNWrr:
813 case AArch64::ORNXrr:
814 case AArch64::ORRWrr:
815 case AArch64::ORRXrr: {
816 unsigned Opcode;
817 switch (MI.getOpcode()) {
818 default:
819 return false;
820 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
821 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
822 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
823 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
824 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
825 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
826 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
827 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
828 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
829 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
830 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
831 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
832 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
833 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
834 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
835 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
836 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
837 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
838 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
839 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
840 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
841 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
842 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
843 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
844 }
845 MachineInstrBuilder MIB1 =
846 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
847 MI.getOperand(0).getReg())
Diana Picus116bbab2017-01-13 09:58:52 +0000848 .add(MI.getOperand(1))
849 .add(MI.getOperand(2))
Tim Northover3b0846e2014-05-24 12:50:23 +0000850 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
851 transferImpOps(MI, MIB1, MIB1);
852 MI.eraseFromParent();
853 return true;
854 }
855
Tim Northover3b0846e2014-05-24 12:50:23 +0000856 case AArch64::LOADgot: {
857 // Expand into ADRP + LDR.
858 unsigned DstReg = MI.getOperand(0).getReg();
859 const MachineOperand &MO1 = MI.getOperand(1);
860 unsigned Flags = MO1.getTargetFlags();
861 MachineInstrBuilder MIB1 =
862 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
863 MachineInstrBuilder MIB2 =
864 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
Diana Picus116bbab2017-01-13 09:58:52 +0000865 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000866 .addReg(DstReg);
867
868 if (MO1.isGlobal()) {
869 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
870 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
871 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
872 } else if (MO1.isSymbol()) {
873 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
874 MIB2.addExternalSymbol(MO1.getSymbolName(),
875 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
876 } else {
877 assert(MO1.isCPI() &&
878 "Only expect globals, externalsymbols, or constant pools");
879 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
880 Flags | AArch64II::MO_PAGE);
881 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
882 Flags | AArch64II::MO_PAGEOFF |
883 AArch64II::MO_NC);
884 }
885
886 transferImpOps(MI, MIB1, MIB2);
887 MI.eraseFromParent();
888 return true;
889 }
890
891 case AArch64::MOVaddr:
892 case AArch64::MOVaddrJT:
893 case AArch64::MOVaddrCP:
894 case AArch64::MOVaddrBA:
895 case AArch64::MOVaddrTLS:
896 case AArch64::MOVaddrEXT: {
897 // Expand into ADRP + ADD.
898 unsigned DstReg = MI.getOperand(0).getReg();
899 MachineInstrBuilder MIB1 =
900 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
Diana Picus116bbab2017-01-13 09:58:52 +0000901 .add(MI.getOperand(1));
Tim Northover3b0846e2014-05-24 12:50:23 +0000902
903 MachineInstrBuilder MIB2 =
904 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
Diana Picus116bbab2017-01-13 09:58:52 +0000905 .add(MI.getOperand(0))
Tim Northover3b0846e2014-05-24 12:50:23 +0000906 .addReg(DstReg)
Diana Picus116bbab2017-01-13 09:58:52 +0000907 .add(MI.getOperand(2))
Tim Northover3b0846e2014-05-24 12:50:23 +0000908 .addImm(0);
909
910 transferImpOps(MI, MIB1, MIB2);
911 MI.eraseFromParent();
912 return true;
913 }
Chad Rosier862a4122017-03-27 15:52:38 +0000914 case AArch64::MOVbaseTLS: {
915 unsigned DstReg = MI.getOperand(0).getReg();
Petr Hosek9eb0a1e2017-04-04 19:51:53 +0000916 auto SysReg = AArch64SysReg::TPIDR_EL0;
917 MachineFunction *MF = MBB.getParent();
918 if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
919 MF->getTarget().getCodeModel() == CodeModel::Kernel)
920 SysReg = AArch64SysReg::TPIDR_EL1;
Chad Rosier862a4122017-03-27 15:52:38 +0000921 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
Petr Hosek9eb0a1e2017-04-04 19:51:53 +0000922 .addImm(SysReg);
Chad Rosier862a4122017-03-27 15:52:38 +0000923 MI.eraseFromParent();
924 return true;
925 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000926
927 case AArch64::MOVi32imm:
928 return expandMOVImm(MBB, MBBI, 32);
929 case AArch64::MOVi64imm:
930 return expandMOVImm(MBB, MBBI, 64);
Juergen Ributzka5fe5ef92015-03-30 22:45:56 +0000931 case AArch64::RET_ReallyLR: {
Matthias Braun76bb4132016-12-16 23:55:43 +0000932 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
933 // function and missing live-ins. We are fine in practice because callee
934 // saved register handling ensures the register value is restored before
935 // RET, but we need the undef flag here to appease the MachineVerifier
936 // liveness checks.
Juergen Ributzka5fe5ef92015-03-30 22:45:56 +0000937 MachineInstrBuilder MIB =
938 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
Matthias Braun76bb4132016-12-16 23:55:43 +0000939 .addReg(AArch64::LR, RegState::Undef);
Juergen Ributzka5fe5ef92015-03-30 22:45:56 +0000940 transferImpOps(MI, MIB, MIB);
Tim Northover3b0846e2014-05-24 12:50:23 +0000941 MI.eraseFromParent();
942 return true;
943 }
Tim Northovercdf15292016-04-14 17:03:29 +0000944 case AArch64::CMP_SWAP_8:
945 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
946 AArch64::SUBSWrx,
947 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
948 AArch64::WZR, NextMBBI);
949 case AArch64::CMP_SWAP_16:
950 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
951 AArch64::SUBSWrx,
952 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
953 AArch64::WZR, NextMBBI);
954 case AArch64::CMP_SWAP_32:
955 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
956 AArch64::SUBSWrs,
957 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
958 AArch64::WZR, NextMBBI);
959 case AArch64::CMP_SWAP_64:
960 return expandCMP_SWAP(MBB, MBBI,
961 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
962 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
963 AArch64::XZR, NextMBBI);
964 case AArch64::CMP_SWAP_128:
965 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
Tim Northover100b7f62017-04-20 21:57:45 +0000966
Florian Hahnf63a5e92017-07-29 20:35:28 +0000967 case AArch64::AESMCrrTied:
Tim Northover869fa742017-08-03 16:59:36 +0000968 case AArch64::AESIMCrrTied: {
969 MachineInstrBuilder MIB =
970 BuildMI(MBB, MBBI, MI.getDebugLoc(),
971 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
972 AArch64::AESIMCrr))
973 .add(MI.getOperand(0))
974 .add(MI.getOperand(1));
975 transferImpOps(MI, MIB, MIB);
976 MI.eraseFromParent();
Florian Hahnf63a5e92017-07-29 20:35:28 +0000977 return true;
Tim Northover869fa742017-08-03 16:59:36 +0000978 }
Juergen Ributzka5fe5ef92015-03-30 22:45:56 +0000979 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000980 return false;
981}
982
983/// \brief Iterate over the instructions in basic block MBB and expand any
984/// pseudo instructions. Return true if anything was modified.
985bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
986 bool Modified = false;
987
988 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
989 while (MBBI != E) {
990 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
Tim Northovercdf15292016-04-14 17:03:29 +0000991 Modified |= expandMI(MBB, MBBI, NMBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000992 MBBI = NMBBI;
993 }
994
995 return Modified;
996}
997
998bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
Eric Christopherfc6de422014-08-05 02:39:49 +0000999 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +00001000
1001 bool Modified = false;
1002 for (auto &MBB : MF)
1003 Modified |= expandMBB(MBB);
1004 return Modified;
1005}
1006
1007/// \brief Returns an instance of the pseudo instruction expansion pass.
1008FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1009 return new AArch64ExpandPseudo();
1010}