blob: f8710f8f6fef9ef75bc023ef5c18c5bb8910bb1c [file] [log] [blame]
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001//===--- HexagonBitSimplify.cpp -------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE "hexbit"
11
Mehdi Aminib550cb12016-04-18 09:17:29 +000012#include "HexagonBitTracker.h"
13#include "HexagonTargetMachine.h"
Krzysztof Parzyszekced99412015-10-20 22:57:13 +000014#include "llvm/CodeGen/MachineDominators.h"
15#include "llvm/CodeGen/MachineFunctionPass.h"
16#include "llvm/CodeGen/MachineInstrBuilder.h"
17#include "llvm/CodeGen/MachineRegisterInfo.h"
Mehdi Aminib550cb12016-04-18 09:17:29 +000018#include "llvm/CodeGen/Passes.h"
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +000019#include "llvm/Support/CommandLine.h"
Krzysztof Parzyszekced99412015-10-20 22:57:13 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/raw_ostream.h"
Krzysztof Parzyszekced99412015-10-20 22:57:13 +000022#include "llvm/Target/TargetInstrInfo.h"
Mehdi Aminib550cb12016-04-18 09:17:29 +000023#include "llvm/Target/TargetMachine.h"
Krzysztof Parzyszekced99412015-10-20 22:57:13 +000024
25using namespace llvm;
26
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +000027static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
28 cl::init(true), cl::desc("Preserve subregisters in tied operands"));
29
Krzysztof Parzyszekced99412015-10-20 22:57:13 +000030namespace llvm {
31 void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
32 FunctionPass *createHexagonBitSimplify();
33}
34
35namespace {
36 // Set of virtual registers, based on BitVector.
37 struct RegisterSet : private BitVector {
38 RegisterSet() : BitVector() {}
39 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
40 RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
41
42 using BitVector::clear;
43 using BitVector::count;
44
45 unsigned find_first() const {
46 int First = BitVector::find_first();
47 if (First < 0)
48 return 0;
49 return x2v(First);
50 }
51
52 unsigned find_next(unsigned Prev) const {
53 int Next = BitVector::find_next(v2x(Prev));
54 if (Next < 0)
55 return 0;
56 return x2v(Next);
57 }
58
59 RegisterSet &insert(unsigned R) {
60 unsigned Idx = v2x(R);
61 ensure(Idx);
62 return static_cast<RegisterSet&>(BitVector::set(Idx));
63 }
64 RegisterSet &remove(unsigned R) {
65 unsigned Idx = v2x(R);
66 if (Idx >= size())
67 return *this;
68 return static_cast<RegisterSet&>(BitVector::reset(Idx));
69 }
70
71 RegisterSet &insert(const RegisterSet &Rs) {
72 return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
73 }
74 RegisterSet &remove(const RegisterSet &Rs) {
75 return static_cast<RegisterSet&>(BitVector::reset(Rs));
76 }
77
78 reference operator[](unsigned R) {
79 unsigned Idx = v2x(R);
80 ensure(Idx);
81 return BitVector::operator[](Idx);
82 }
83 bool operator[](unsigned R) const {
84 unsigned Idx = v2x(R);
85 assert(Idx < size());
86 return BitVector::operator[](Idx);
87 }
88 bool has(unsigned R) const {
89 unsigned Idx = v2x(R);
90 if (Idx >= size())
91 return false;
92 return BitVector::test(Idx);
93 }
94
95 bool empty() const {
96 return !BitVector::any();
97 }
98 bool includes(const RegisterSet &Rs) const {
99 // A.BitVector::test(B) <=> A-B != {}
100 return !Rs.BitVector::test(*this);
101 }
102 bool intersects(const RegisterSet &Rs) const {
103 return BitVector::anyCommon(Rs);
104 }
105
106 private:
107 void ensure(unsigned Idx) {
108 if (size() <= Idx)
109 resize(std::max(Idx+1, 32U));
110 }
111 static inline unsigned v2x(unsigned v) {
112 return TargetRegisterInfo::virtReg2Index(v);
113 }
114 static inline unsigned x2v(unsigned x) {
115 return TargetRegisterInfo::index2VirtReg(x);
116 }
117 };
118
119
120 struct PrintRegSet {
121 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
122 : RS(S), TRI(RI) {}
123 friend raw_ostream &operator<< (raw_ostream &OS,
124 const PrintRegSet &P);
125 private:
126 const RegisterSet &RS;
127 const TargetRegisterInfo *TRI;
128 };
129
130 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
131 LLVM_ATTRIBUTE_UNUSED;
132 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
133 OS << '{';
134 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
135 OS << ' ' << PrintReg(R, P.TRI);
136 OS << " }";
137 return OS;
138 }
139}
140
141
142namespace {
143 class Transformation;
144
145 class HexagonBitSimplify : public MachineFunctionPass {
146 public:
147 static char ID;
148 HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
149 initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
150 }
Mehdi Amini117296c2016-10-01 02:56:57 +0000151 virtual StringRef getPassName() const {
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000152 return "Hexagon bit simplification";
153 }
154 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
155 AU.addRequired<MachineDominatorTree>();
156 AU.addPreserved<MachineDominatorTree>();
157 MachineFunctionPass::getAnalysisUsage(AU);
158 }
159 virtual bool runOnMachineFunction(MachineFunction &MF);
160
161 static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
162 static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
163 static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
164 const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000165 static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
166 uint16_t W);
167 static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
168 uint16_t W, uint64_t &U);
169 static bool replaceReg(unsigned OldR, unsigned NewR,
170 MachineRegisterInfo &MRI);
171 static bool getSubregMask(const BitTracker::RegisterRef &RR,
172 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
173 static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
174 unsigned NewSR, MachineRegisterInfo &MRI);
175 static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
176 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
177 static bool parseRegSequence(const MachineInstr &I,
178 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
179
180 static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
181 uint16_t Begin);
182 static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
183 uint16_t Begin, const HexagonInstrInfo &HII);
184
185 static const TargetRegisterClass *getFinalVRegClass(
186 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
187 static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
188 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
189
190 private:
191 MachineDominatorTree *MDT;
192
193 bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +0000194 static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
195 unsigned NewSub = Hexagon::NoSubRegister);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000196 };
197
198 char HexagonBitSimplify::ID = 0;
199 typedef HexagonBitSimplify HBS;
200
201
202 // The purpose of this class is to provide a common facility to traverse
203 // the function top-down or bottom-up via the dominator tree, and keep
204 // track of the available registers.
205 class Transformation {
206 public:
207 bool TopDown;
208 Transformation(bool TD) : TopDown(TD) {}
209 virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
210 virtual ~Transformation() {}
211 };
212}
213
214INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
215 "Hexagon bit simplification", false, false)
216INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
217INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
218 "Hexagon bit simplification", false, false)
219
220
221bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
222 RegisterSet &AVs) {
223 MachineDomTreeNode *N = MDT->getNode(&B);
224 typedef GraphTraits<MachineDomTreeNode*> GTN;
225 bool Changed = false;
226
227 if (T.TopDown)
228 Changed = T.processBlock(B, AVs);
229
230 RegisterSet Defs;
231 for (auto &I : B)
232 getInstrDefs(I, Defs);
233 RegisterSet NewAVs = AVs;
234 NewAVs.insert(Defs);
235
236 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
237 MachineBasicBlock *SB = (*I)->getBlock();
238 Changed |= visitBlock(*SB, T, NewAVs);
239 }
240 if (!T.TopDown)
241 Changed |= T.processBlock(B, AVs);
242
243 return Changed;
244}
245
246//
247// Utility functions:
248//
249void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
250 RegisterSet &Defs) {
251 for (auto &Op : MI.operands()) {
252 if (!Op.isReg() || !Op.isDef())
253 continue;
254 unsigned R = Op.getReg();
255 if (!TargetRegisterInfo::isVirtualRegister(R))
256 continue;
257 Defs.insert(R);
258 }
259}
260
261void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
262 RegisterSet &Uses) {
263 for (auto &Op : MI.operands()) {
264 if (!Op.isReg() || !Op.isUse())
265 continue;
266 unsigned R = Op.getReg();
267 if (!TargetRegisterInfo::isVirtualRegister(R))
268 continue;
269 Uses.insert(R);
270 }
271}
272
273// Check if all the bits in range [B, E) in both cells are equal.
274bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
275 uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
276 uint16_t W) {
277 for (uint16_t i = 0; i < W; ++i) {
278 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
279 if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
280 return false;
281 // Same for RC2[i].
282 if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
283 return false;
284 if (RC1[B1+i] != RC2[B2+i])
285 return false;
286 }
287 return true;
288}
289
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000290bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
291 uint16_t B, uint16_t W) {
292 assert(B < RC.width() && B+W <= RC.width());
293 for (uint16_t i = B; i < B+W; ++i)
294 if (!RC[i].is(0))
295 return false;
296 return true;
297}
298
299
300bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
301 uint16_t B, uint16_t W, uint64_t &U) {
302 assert(B < RC.width() && B+W <= RC.width());
303 int64_t T = 0;
304 for (uint16_t i = B+W; i > B; --i) {
305 const BitTracker::BitValue &BV = RC[i-1];
306 T <<= 1;
307 if (BV.is(1))
308 T |= 1;
309 else if (!BV.is(0))
310 return false;
311 }
312 U = T;
313 return true;
314}
315
316
317bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
318 MachineRegisterInfo &MRI) {
319 if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
320 !TargetRegisterInfo::isVirtualRegister(NewR))
321 return false;
322 auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
323 decltype(End) NextI;
324 for (auto I = Begin; I != End; I = NextI) {
325 NextI = std::next(I);
326 I->setReg(NewR);
327 }
328 return Begin != End;
329}
330
331
332bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
333 unsigned NewSR, MachineRegisterInfo &MRI) {
334 if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
335 !TargetRegisterInfo::isVirtualRegister(NewR))
336 return false;
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +0000337 if (hasTiedUse(OldR, MRI, NewSR))
338 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000339 auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
340 decltype(End) NextI;
341 for (auto I = Begin; I != End; I = NextI) {
342 NextI = std::next(I);
343 I->setReg(NewR);
344 I->setSubReg(NewSR);
345 }
346 return Begin != End;
347}
348
349
350bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
351 unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
352 if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
353 !TargetRegisterInfo::isVirtualRegister(NewR))
354 return false;
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +0000355 if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
356 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000357 auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
358 decltype(End) NextI;
359 for (auto I = Begin; I != End; I = NextI) {
360 NextI = std::next(I);
361 if (I->getSubReg() != OldSR)
362 continue;
363 I->setReg(NewR);
364 I->setSubReg(NewSR);
365 }
366 return Begin != End;
367}
368
369
370// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
371// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
372// if this succeeded, otherwise return false.
373bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
374 unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
375 const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +0000376 if (RR.Sub == 0) {
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000377 Begin = 0;
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +0000378 Width = RC->getSize()*8;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000379 return true;
380 }
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +0000381
382 assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
383 if (RR.Sub == Hexagon::subreg_loreg)
384 Begin = 0;
385
386 switch (RC->getID()) {
387 case Hexagon::DoubleRegsRegClassID:
388 case Hexagon::VecDblRegsRegClassID:
389 case Hexagon::VecDblRegs128BRegClassID:
390 Width = RC->getSize()*8 / 2;
391 if (RR.Sub == Hexagon::subreg_hireg)
392 Begin = Width;
393 break;
394 default:
395 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000396 }
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +0000397 return true;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000398}
399
400
401// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
402// subregister.
403bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
404 BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
405 assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
406 unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
407 assert(Sub1 != Sub2);
408 if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
409 SL = I.getOperand(1);
410 SH = I.getOperand(3);
411 return true;
412 }
413 if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
414 SH = I.getOperand(1);
415 SL = I.getOperand(3);
416 return true;
417 }
418 return false;
419}
420
421
422// All stores (except 64-bit stores) take a 32-bit register as the source
423// of the value to be stored. If the instruction stores into a location
424// that is shorter than 32 bits, some bits of the source register are not
425// used. For each store instruction, calculate the set of used bits in
426// the source register, and set appropriate bits in Bits. Return true if
427// the bits are calculated, false otherwise.
428bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
429 uint16_t Begin) {
430 using namespace Hexagon;
431
432 switch (Opc) {
433 // Store byte
434 case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
435 case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
436 case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
437 case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
438 case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
439 case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
440 case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
441 case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
442 case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
443 case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
444 case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
445 case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
446 case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
447 case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
448 case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
449 case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
450 case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
451 case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
452 case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
453 case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
454 case S4_storerb_ap: // memb(Re32=#U6)=Rt32
455 case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
456 case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
457 case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
458 case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
459 case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
460 case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
461 case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
462 case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
463 case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
464 case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
465 case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
466 case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
467 case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
468 case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
469 case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
470 case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
471 case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
472 case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
473 case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
474 case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
475 case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
476 case S2_storerbgp: // memb(gp+#u16:0)=Rt32
477 case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
478 case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
479 case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
480 case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
481 case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
482 case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
483 case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
484 case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
485 case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
486 Bits.set(Begin, Begin+8);
487 return true;
488
489 // Store low half
490 case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
491 case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
492 case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
493 case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
494 case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
495 case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
496 case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
497 case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
498 case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
499 case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
500 case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
501 case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
502 case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
503 case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
504 case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
505 case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
506 case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
507 case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
508 case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
509 case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
510 case S4_storerh_ap: // memh(Re32=#U6)=Rt32
511 case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
512 case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
513 case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
514 case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
515 case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
516 case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
517 case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
518 case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
519 case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
520 case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
521 case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
522 case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
523 case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
524 case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
525 case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
526 case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
527 case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
528 case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
529 case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
530 case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
531 case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
532 case S2_storerhgp: // memh(gp+#u16:1)=Rt32
533 case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
534 case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
535 case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
536 case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
537 case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
538 case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
539 case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
540 case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
541 case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
542 Bits.set(Begin, Begin+16);
543 return true;
544
545 // Store high half
546 case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
547 case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
548 case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
549 case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
550 case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
551 case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
552 case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
553 case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
554 case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
555 case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
556 case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
557 case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
558 case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
559 case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
560 case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
561 case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
562 case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
563 case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
564 case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
565 case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
566 case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
567 case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
568 case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
569 case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
570 case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
571 case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
572 Bits.set(Begin+16, Begin+32);
573 return true;
574 }
575
576 return false;
577}
578
579
580// For an instruction with opcode Opc, calculate the set of bits that it
581// uses in a register in operand OpN. This only calculates the set of used
582// bits for cases where it does not depend on any operands (as is the case
583// in shifts, for example). For concrete instructions from a program, the
584// operand may be a subregister of a larger register, while Bits would
585// correspond to the larger register in its entirety. Because of that,
586// the parameter Begin can be used to indicate which bit of Bits should be
587// considered the LSB of of the operand.
588bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
589 BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
590 using namespace Hexagon;
591
592 const MCInstrDesc &D = HII.get(Opc);
593 if (D.mayStore()) {
594 if (OpN == D.getNumOperands()-1)
595 return getUsedBitsInStore(Opc, Bits, Begin);
596 return false;
597 }
598
599 switch (Opc) {
600 // One register source. Used bits: R1[0-7].
601 case A2_sxtb:
602 case A2_zxtb:
603 case A4_cmpbeqi:
604 case A4_cmpbgti:
605 case A4_cmpbgtui:
606 if (OpN == 1) {
607 Bits.set(Begin, Begin+8);
608 return true;
609 }
610 break;
611
612 // One register source. Used bits: R1[0-15].
613 case A2_aslh:
614 case A2_sxth:
615 case A2_zxth:
616 case A4_cmpheqi:
617 case A4_cmphgti:
618 case A4_cmphgtui:
619 if (OpN == 1) {
620 Bits.set(Begin, Begin+16);
621 return true;
622 }
623 break;
624
625 // One register source. Used bits: R1[16-31].
626 case A2_asrh:
627 if (OpN == 1) {
628 Bits.set(Begin+16, Begin+32);
629 return true;
630 }
631 break;
632
633 // Two register sources. Used bits: R1[0-7], R2[0-7].
634 case A4_cmpbeq:
635 case A4_cmpbgt:
636 case A4_cmpbgtu:
637 if (OpN == 1) {
638 Bits.set(Begin, Begin+8);
639 return true;
640 }
641 break;
642
643 // Two register sources. Used bits: R1[0-15], R2[0-15].
644 case A4_cmpheq:
645 case A4_cmphgt:
646 case A4_cmphgtu:
647 case A2_addh_h16_ll:
648 case A2_addh_h16_sat_ll:
649 case A2_addh_l16_ll:
650 case A2_addh_l16_sat_ll:
651 case A2_combine_ll:
652 case A2_subh_h16_ll:
653 case A2_subh_h16_sat_ll:
654 case A2_subh_l16_ll:
655 case A2_subh_l16_sat_ll:
656 case M2_mpy_acc_ll_s0:
657 case M2_mpy_acc_ll_s1:
658 case M2_mpy_acc_sat_ll_s0:
659 case M2_mpy_acc_sat_ll_s1:
660 case M2_mpy_ll_s0:
661 case M2_mpy_ll_s1:
662 case M2_mpy_nac_ll_s0:
663 case M2_mpy_nac_ll_s1:
664 case M2_mpy_nac_sat_ll_s0:
665 case M2_mpy_nac_sat_ll_s1:
666 case M2_mpy_rnd_ll_s0:
667 case M2_mpy_rnd_ll_s1:
668 case M2_mpy_sat_ll_s0:
669 case M2_mpy_sat_ll_s1:
670 case M2_mpy_sat_rnd_ll_s0:
671 case M2_mpy_sat_rnd_ll_s1:
672 case M2_mpyd_acc_ll_s0:
673 case M2_mpyd_acc_ll_s1:
674 case M2_mpyd_ll_s0:
675 case M2_mpyd_ll_s1:
676 case M2_mpyd_nac_ll_s0:
677 case M2_mpyd_nac_ll_s1:
678 case M2_mpyd_rnd_ll_s0:
679 case M2_mpyd_rnd_ll_s1:
680 case M2_mpyu_acc_ll_s0:
681 case M2_mpyu_acc_ll_s1:
682 case M2_mpyu_ll_s0:
683 case M2_mpyu_ll_s1:
684 case M2_mpyu_nac_ll_s0:
685 case M2_mpyu_nac_ll_s1:
686 case M2_mpyud_acc_ll_s0:
687 case M2_mpyud_acc_ll_s1:
688 case M2_mpyud_ll_s0:
689 case M2_mpyud_ll_s1:
690 case M2_mpyud_nac_ll_s0:
691 case M2_mpyud_nac_ll_s1:
692 if (OpN == 1 || OpN == 2) {
693 Bits.set(Begin, Begin+16);
694 return true;
695 }
696 break;
697
698 // Two register sources. Used bits: R1[0-15], R2[16-31].
699 case A2_addh_h16_lh:
700 case A2_addh_h16_sat_lh:
701 case A2_combine_lh:
702 case A2_subh_h16_lh:
703 case A2_subh_h16_sat_lh:
704 case M2_mpy_acc_lh_s0:
705 case M2_mpy_acc_lh_s1:
706 case M2_mpy_acc_sat_lh_s0:
707 case M2_mpy_acc_sat_lh_s1:
708 case M2_mpy_lh_s0:
709 case M2_mpy_lh_s1:
710 case M2_mpy_nac_lh_s0:
711 case M2_mpy_nac_lh_s1:
712 case M2_mpy_nac_sat_lh_s0:
713 case M2_mpy_nac_sat_lh_s1:
714 case M2_mpy_rnd_lh_s0:
715 case M2_mpy_rnd_lh_s1:
716 case M2_mpy_sat_lh_s0:
717 case M2_mpy_sat_lh_s1:
718 case M2_mpy_sat_rnd_lh_s0:
719 case M2_mpy_sat_rnd_lh_s1:
720 case M2_mpyd_acc_lh_s0:
721 case M2_mpyd_acc_lh_s1:
722 case M2_mpyd_lh_s0:
723 case M2_mpyd_lh_s1:
724 case M2_mpyd_nac_lh_s0:
725 case M2_mpyd_nac_lh_s1:
726 case M2_mpyd_rnd_lh_s0:
727 case M2_mpyd_rnd_lh_s1:
728 case M2_mpyu_acc_lh_s0:
729 case M2_mpyu_acc_lh_s1:
730 case M2_mpyu_lh_s0:
731 case M2_mpyu_lh_s1:
732 case M2_mpyu_nac_lh_s0:
733 case M2_mpyu_nac_lh_s1:
734 case M2_mpyud_acc_lh_s0:
735 case M2_mpyud_acc_lh_s1:
736 case M2_mpyud_lh_s0:
737 case M2_mpyud_lh_s1:
738 case M2_mpyud_nac_lh_s0:
739 case M2_mpyud_nac_lh_s1:
740 // These four are actually LH.
741 case A2_addh_l16_hl:
742 case A2_addh_l16_sat_hl:
743 case A2_subh_l16_hl:
744 case A2_subh_l16_sat_hl:
745 if (OpN == 1) {
746 Bits.set(Begin, Begin+16);
747 return true;
748 }
749 if (OpN == 2) {
750 Bits.set(Begin+16, Begin+32);
751 return true;
752 }
753 break;
754
755 // Two register sources, used bits: R1[16-31], R2[0-15].
756 case A2_addh_h16_hl:
757 case A2_addh_h16_sat_hl:
758 case A2_combine_hl:
759 case A2_subh_h16_hl:
760 case A2_subh_h16_sat_hl:
761 case M2_mpy_acc_hl_s0:
762 case M2_mpy_acc_hl_s1:
763 case M2_mpy_acc_sat_hl_s0:
764 case M2_mpy_acc_sat_hl_s1:
765 case M2_mpy_hl_s0:
766 case M2_mpy_hl_s1:
767 case M2_mpy_nac_hl_s0:
768 case M2_mpy_nac_hl_s1:
769 case M2_mpy_nac_sat_hl_s0:
770 case M2_mpy_nac_sat_hl_s1:
771 case M2_mpy_rnd_hl_s0:
772 case M2_mpy_rnd_hl_s1:
773 case M2_mpy_sat_hl_s0:
774 case M2_mpy_sat_hl_s1:
775 case M2_mpy_sat_rnd_hl_s0:
776 case M2_mpy_sat_rnd_hl_s1:
777 case M2_mpyd_acc_hl_s0:
778 case M2_mpyd_acc_hl_s1:
779 case M2_mpyd_hl_s0:
780 case M2_mpyd_hl_s1:
781 case M2_mpyd_nac_hl_s0:
782 case M2_mpyd_nac_hl_s1:
783 case M2_mpyd_rnd_hl_s0:
784 case M2_mpyd_rnd_hl_s1:
785 case M2_mpyu_acc_hl_s0:
786 case M2_mpyu_acc_hl_s1:
787 case M2_mpyu_hl_s0:
788 case M2_mpyu_hl_s1:
789 case M2_mpyu_nac_hl_s0:
790 case M2_mpyu_nac_hl_s1:
791 case M2_mpyud_acc_hl_s0:
792 case M2_mpyud_acc_hl_s1:
793 case M2_mpyud_hl_s0:
794 case M2_mpyud_hl_s1:
795 case M2_mpyud_nac_hl_s0:
796 case M2_mpyud_nac_hl_s1:
797 if (OpN == 1) {
798 Bits.set(Begin+16, Begin+32);
799 return true;
800 }
801 if (OpN == 2) {
802 Bits.set(Begin, Begin+16);
803 return true;
804 }
805 break;
806
807 // Two register sources, used bits: R1[16-31], R2[16-31].
808 case A2_addh_h16_hh:
809 case A2_addh_h16_sat_hh:
810 case A2_combine_hh:
811 case A2_subh_h16_hh:
812 case A2_subh_h16_sat_hh:
813 case M2_mpy_acc_hh_s0:
814 case M2_mpy_acc_hh_s1:
815 case M2_mpy_acc_sat_hh_s0:
816 case M2_mpy_acc_sat_hh_s1:
817 case M2_mpy_hh_s0:
818 case M2_mpy_hh_s1:
819 case M2_mpy_nac_hh_s0:
820 case M2_mpy_nac_hh_s1:
821 case M2_mpy_nac_sat_hh_s0:
822 case M2_mpy_nac_sat_hh_s1:
823 case M2_mpy_rnd_hh_s0:
824 case M2_mpy_rnd_hh_s1:
825 case M2_mpy_sat_hh_s0:
826 case M2_mpy_sat_hh_s1:
827 case M2_mpy_sat_rnd_hh_s0:
828 case M2_mpy_sat_rnd_hh_s1:
829 case M2_mpyd_acc_hh_s0:
830 case M2_mpyd_acc_hh_s1:
831 case M2_mpyd_hh_s0:
832 case M2_mpyd_hh_s1:
833 case M2_mpyd_nac_hh_s0:
834 case M2_mpyd_nac_hh_s1:
835 case M2_mpyd_rnd_hh_s0:
836 case M2_mpyd_rnd_hh_s1:
837 case M2_mpyu_acc_hh_s0:
838 case M2_mpyu_acc_hh_s1:
839 case M2_mpyu_hh_s0:
840 case M2_mpyu_hh_s1:
841 case M2_mpyu_nac_hh_s0:
842 case M2_mpyu_nac_hh_s1:
843 case M2_mpyud_acc_hh_s0:
844 case M2_mpyud_acc_hh_s1:
845 case M2_mpyud_hh_s0:
846 case M2_mpyud_hh_s1:
847 case M2_mpyud_nac_hh_s0:
848 case M2_mpyud_nac_hh_s1:
849 if (OpN == 1 || OpN == 2) {
850 Bits.set(Begin+16, Begin+32);
851 return true;
852 }
853 break;
854 }
855
856 return false;
857}
858
859
860// Calculate the register class that matches Reg:Sub. For example, if
861// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
862// register class.
863const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
864 const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
865 if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
866 return nullptr;
867 auto *RC = MRI.getRegClass(RR.Reg);
868 if (RR.Sub == 0)
869 return RC;
870
871 auto VerifySR = [] (unsigned Sub) -> void {
872 assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
873 };
874
875 switch (RC->getID()) {
876 case Hexagon::DoubleRegsRegClassID:
877 VerifySR(RR.Sub);
878 return &Hexagon::IntRegsRegClass;
Krzysztof Parzyszek5337a3e2016-01-14 21:45:43 +0000879 case Hexagon::VecDblRegsRegClassID:
880 VerifySR(RR.Sub);
881 return &Hexagon::VectorRegsRegClass;
882 case Hexagon::VecDblRegs128BRegClassID:
883 VerifySR(RR.Sub);
884 return &Hexagon::VectorRegs128BRegClass;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000885 }
886 return nullptr;
887}
888
889
890// Check if RD could be replaced with RS at any possible use of RD.
891// For example a predicate register cannot be replaced with a integer
892// register, but a 64-bit register with a subregister can be replaced
893// with a 32-bit register.
894bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
895 const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
896 if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
897 !TargetRegisterInfo::isVirtualRegister(RS.Reg))
898 return false;
899 // Return false if one (or both) classes are nullptr.
900 auto *DRC = getFinalVRegClass(RD, MRI);
901 if (!DRC)
902 return false;
903
904 return DRC == getFinalVRegClass(RS, MRI);
905}
906
907
Krzysztof Parzyszekd391d6f2016-10-06 16:18:04 +0000908bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
909 unsigned NewSub) {
910 if (!PreserveTiedOps)
911 return false;
912 return any_of(MRI.use_operands(Reg),
913 [NewSub] (const MachineOperand &Op) -> bool {
914 return Op.getSubReg() != NewSub && Op.isTied();
915 });
916}
917
Krzysztof Parzyszekced99412015-10-20 22:57:13 +0000918//
919// Dead code elimination
920//
921namespace {
922 class DeadCodeElimination {
923 public:
924 DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
925 : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
926 MDT(mdt), MRI(mf.getRegInfo()) {}
927
928 bool run() {
929 return runOnNode(MDT.getRootNode());
930 }
931
932 private:
933 bool isDead(unsigned R) const;
934 bool runOnNode(MachineDomTreeNode *N);
935
936 MachineFunction &MF;
937 const HexagonInstrInfo &HII;
938 MachineDominatorTree &MDT;
939 MachineRegisterInfo &MRI;
940 };
941}
942
943
944bool DeadCodeElimination::isDead(unsigned R) const {
945 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
946 MachineInstr *UseI = I->getParent();
947 if (UseI->isDebugValue())
948 continue;
949 if (UseI->isPHI()) {
950 assert(!UseI->getOperand(0).getSubReg());
951 unsigned DR = UseI->getOperand(0).getReg();
952 if (DR == R)
953 continue;
954 }
955 return false;
956 }
957 return true;
958}
959
960
961bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
962 bool Changed = false;
963 typedef GraphTraits<MachineDomTreeNode*> GTN;
964 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
965 Changed |= runOnNode(*I);
966
967 MachineBasicBlock *B = N->getBlock();
968 std::vector<MachineInstr*> Instrs;
969 for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
970 Instrs.push_back(&*I);
971
972 for (auto MI : Instrs) {
973 unsigned Opc = MI->getOpcode();
974 // Do not touch lifetime markers. This is why the target-independent DCE
975 // cannot be used.
976 if (Opc == TargetOpcode::LIFETIME_START ||
977 Opc == TargetOpcode::LIFETIME_END)
978 continue;
979 bool Store = false;
980 if (MI->isInlineAsm())
981 continue;
982 // Delete PHIs if possible.
983 if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
984 continue;
985
986 bool AllDead = true;
987 SmallVector<unsigned,2> Regs;
988 for (auto &Op : MI->operands()) {
989 if (!Op.isReg() || !Op.isDef())
990 continue;
991 unsigned R = Op.getReg();
992 if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
993 AllDead = false;
994 break;
995 }
996 Regs.push_back(R);
997 }
998 if (!AllDead)
999 continue;
1000
1001 B->erase(MI);
1002 for (unsigned i = 0, n = Regs.size(); i != n; ++i)
1003 MRI.markUsesInDebugValueAsUndef(Regs[i]);
1004 Changed = true;
1005 }
1006
1007 return Changed;
1008}
1009
1010
1011//
1012// Eliminate redundant instructions
1013//
1014// This transformation will identify instructions where the output register
1015// is the same as one of its input registers. This only works on instructions
1016// that define a single register (unlike post-increment loads, for example).
1017// The equality check is actually more detailed: the code calculates which
1018// bits of the output are used, and only compares these bits with the input
1019// registers.
1020// If the output matches an input, the instruction is replaced with COPY.
1021// The copies will be removed by another transformation.
1022namespace {
1023 class RedundantInstrElimination : public Transformation {
1024 public:
1025 RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
1026 MachineRegisterInfo &mri)
1027 : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1028 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1029 private:
1030 bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
1031 unsigned &LostB, unsigned &LostE);
1032 bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
1033 unsigned &LostB, unsigned &LostE);
1034 bool computeUsedBits(unsigned Reg, BitVector &Bits);
1035 bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
1036 uint16_t Begin);
1037 bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
1038
1039 const HexagonInstrInfo &HII;
1040 MachineRegisterInfo &MRI;
1041 BitTracker &BT;
1042 };
1043}
1044
1045
1046// Check if the instruction is a lossy shift left, where the input being
1047// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1048// of bit indices that are lost.
1049bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
1050 unsigned OpN, unsigned &LostB, unsigned &LostE) {
1051 using namespace Hexagon;
1052 unsigned Opc = MI.getOpcode();
1053 unsigned ImN, RegN, Width;
1054 switch (Opc) {
1055 case S2_asl_i_p:
1056 ImN = 2;
1057 RegN = 1;
1058 Width = 64;
1059 break;
1060 case S2_asl_i_p_acc:
1061 case S2_asl_i_p_and:
1062 case S2_asl_i_p_nac:
1063 case S2_asl_i_p_or:
1064 case S2_asl_i_p_xacc:
1065 ImN = 3;
1066 RegN = 2;
1067 Width = 64;
1068 break;
1069 case S2_asl_i_r:
1070 ImN = 2;
1071 RegN = 1;
1072 Width = 32;
1073 break;
1074 case S2_addasl_rrri:
1075 case S4_andi_asl_ri:
1076 case S4_ori_asl_ri:
1077 case S4_addi_asl_ri:
1078 case S4_subi_asl_ri:
1079 case S2_asl_i_r_acc:
1080 case S2_asl_i_r_and:
1081 case S2_asl_i_r_nac:
1082 case S2_asl_i_r_or:
1083 case S2_asl_i_r_sat:
1084 case S2_asl_i_r_xacc:
1085 ImN = 3;
1086 RegN = 2;
1087 Width = 32;
1088 break;
1089 default:
1090 return false;
1091 }
1092
1093 if (RegN != OpN)
1094 return false;
1095
1096 assert(MI.getOperand(ImN).isImm());
1097 unsigned S = MI.getOperand(ImN).getImm();
1098 if (S == 0)
1099 return false;
1100 LostB = Width-S;
1101 LostE = Width;
1102 return true;
1103}
1104
1105
1106// Check if the instruction is a lossy shift right, where the input being
1107// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1108// of bit indices that are lost.
1109bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
1110 unsigned OpN, unsigned &LostB, unsigned &LostE) {
1111 using namespace Hexagon;
1112 unsigned Opc = MI.getOpcode();
1113 unsigned ImN, RegN;
1114 switch (Opc) {
1115 case S2_asr_i_p:
1116 case S2_lsr_i_p:
1117 ImN = 2;
1118 RegN = 1;
1119 break;
1120 case S2_asr_i_p_acc:
1121 case S2_asr_i_p_and:
1122 case S2_asr_i_p_nac:
1123 case S2_asr_i_p_or:
1124 case S2_lsr_i_p_acc:
1125 case S2_lsr_i_p_and:
1126 case S2_lsr_i_p_nac:
1127 case S2_lsr_i_p_or:
1128 case S2_lsr_i_p_xacc:
1129 ImN = 3;
1130 RegN = 2;
1131 break;
1132 case S2_asr_i_r:
1133 case S2_lsr_i_r:
1134 ImN = 2;
1135 RegN = 1;
1136 break;
1137 case S4_andi_lsr_ri:
1138 case S4_ori_lsr_ri:
1139 case S4_addi_lsr_ri:
1140 case S4_subi_lsr_ri:
1141 case S2_asr_i_r_acc:
1142 case S2_asr_i_r_and:
1143 case S2_asr_i_r_nac:
1144 case S2_asr_i_r_or:
1145 case S2_lsr_i_r_acc:
1146 case S2_lsr_i_r_and:
1147 case S2_lsr_i_r_nac:
1148 case S2_lsr_i_r_or:
1149 case S2_lsr_i_r_xacc:
1150 ImN = 3;
1151 RegN = 2;
1152 break;
1153
1154 default:
1155 return false;
1156 }
1157
1158 if (RegN != OpN)
1159 return false;
1160
1161 assert(MI.getOperand(ImN).isImm());
1162 unsigned S = MI.getOperand(ImN).getImm();
1163 LostB = 0;
1164 LostE = S;
1165 return true;
1166}
1167
1168
1169// Calculate the bit vector that corresponds to the used bits of register Reg.
1170// The vector Bits has the same size, as the size of Reg in bits. If the cal-
1171// culation fails (i.e. the used bits are unknown), it returns false. Other-
1172// wise, it returns true and sets the corresponding bits in Bits.
1173bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
1174 BitVector Used(Bits.size());
1175 RegisterSet Visited;
1176 std::vector<unsigned> Pending;
1177 Pending.push_back(Reg);
1178
1179 for (unsigned i = 0; i < Pending.size(); ++i) {
1180 unsigned R = Pending[i];
1181 if (Visited.has(R))
1182 continue;
1183 Visited.insert(R);
1184 for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
1185 BitTracker::RegisterRef UR = *I;
1186 unsigned B, W;
1187 if (!HBS::getSubregMask(UR, B, W, MRI))
1188 return false;
1189 MachineInstr &UseI = *I->getParent();
1190 if (UseI.isPHI() || UseI.isCopy()) {
1191 unsigned DefR = UseI.getOperand(0).getReg();
1192 if (!TargetRegisterInfo::isVirtualRegister(DefR))
1193 return false;
1194 Pending.push_back(DefR);
1195 } else {
1196 if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
1197 return false;
1198 }
1199 }
1200 }
1201 Bits |= Used;
1202 return true;
1203}
1204
1205
1206// Calculate the bits used by instruction MI in a register in operand OpN.
1207// Return true/false if the calculation succeeds/fails. If is succeeds, set
1208// used bits in Bits. This function does not reset any bits in Bits, so
1209// subsequent calls over different instructions will result in the union
1210// of the used bits in all these instructions.
1211// The register in question may be used with a sub-register, whereas Bits
1212// holds the bits for the entire register. To keep track of that, the
1213// argument Begin indicates where in Bits is the lowest-significant bit
1214// of the register used in operand OpN. For example, in instruction:
1215// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
1216// the operand 1 is a 32-bit register, which happens to be a subregister
1217// of the 64-bit register vreg2, and that subregister starts at position 32.
1218// In this case Begin=32, since Bits[32] would be the lowest-significant bit
1219// of vreg2:subreg_hireg.
1220bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
1221 unsigned OpN, BitVector &Bits, uint16_t Begin) {
1222 unsigned Opc = MI.getOpcode();
1223 BitVector T(Bits.size());
1224 bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
1225 // Even if we don't have bits yet, we could still provide some information
1226 // if the instruction is a lossy shift: the lost bits will be marked as
1227 // not used.
1228 unsigned LB, LE;
1229 if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
1230 assert(MI.getOperand(OpN).isReg());
1231 BitTracker::RegisterRef RR = MI.getOperand(OpN);
1232 const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
1233 uint16_t Width = RC->getSize()*8;
1234
1235 if (!GotBits)
1236 T.set(Begin, Begin+Width);
1237 assert(LB <= LE && LB < Width && LE <= Width);
1238 T.reset(Begin+LB, Begin+LE);
1239 GotBits = true;
1240 }
1241 if (GotBits)
1242 Bits |= T;
1243 return GotBits;
1244}
1245
1246
1247// Calculates the used bits in RD ("defined register"), and checks if these
1248// bits in RS ("used register") and RD are identical.
1249bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
1250 BitTracker::RegisterRef RS) {
1251 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
1252 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1253
1254 unsigned DB, DW;
1255 if (!HBS::getSubregMask(RD, DB, DW, MRI))
1256 return false;
1257 unsigned SB, SW;
1258 if (!HBS::getSubregMask(RS, SB, SW, MRI))
1259 return false;
1260 if (SW != DW)
1261 return false;
1262
1263 BitVector Used(DC.width());
1264 if (!computeUsedBits(RD.Reg, Used))
1265 return false;
1266
1267 for (unsigned i = 0; i != DW; ++i)
1268 if (Used[i+DB] && DC[DB+i] != SC[SB+i])
1269 return false;
1270 return true;
1271}
1272
1273
1274bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
1275 const RegisterSet&) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001276 if (!BT.reached(&B))
1277 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001278 bool Changed = false;
1279
1280 for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
1281 NextI = std::next(I);
1282 MachineInstr *MI = &*I;
1283
1284 if (MI->getOpcode() == TargetOpcode::COPY)
1285 continue;
1286 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
1287 continue;
1288 unsigned NumD = MI->getDesc().getNumDefs();
1289 if (NumD != 1)
1290 continue;
1291
1292 BitTracker::RegisterRef RD = MI->getOperand(0);
1293 if (!BT.has(RD.Reg))
1294 continue;
1295 const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00001296 auto At = MI->isPHI() ? B.getFirstNonPHI()
1297 : MachineBasicBlock::iterator(MI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001298
1299 // Find a source operand that is equal to the result.
1300 for (auto &Op : MI->uses()) {
1301 if (!Op.isReg())
1302 continue;
1303 BitTracker::RegisterRef RS = Op;
1304 if (!BT.has(RS.Reg))
1305 continue;
1306 if (!HBS::isTransparentCopy(RD, RS, MRI))
1307 continue;
1308
1309 unsigned BN, BW;
1310 if (!HBS::getSubregMask(RS, BN, BW, MRI))
1311 continue;
1312
1313 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
1314 if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
1315 continue;
1316
1317 // If found, replace the instruction with a COPY.
Benjamin Kramer4ca41fd2016-06-12 17:30:47 +00001318 const DebugLoc &DL = MI->getDebugLoc();
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001319 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
1320 unsigned NewR = MRI.createVirtualRegister(FRC);
Krzysztof Parzyszek6eba5b82016-07-26 19:08:45 +00001321 MachineInstr *CopyI =
1322 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1323 .addReg(RS.Reg, 0, RS.Sub);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001324 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
Krzysztof Parzyszek6eba5b82016-07-26 19:08:45 +00001325 // This pass can create copies between registers that don't have the
1326 // exact same values. Updating the tracker has to involve updating
1327 // all dependent cells. Example:
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001328 // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal
1329 //
1330 // vreg3 = copy vreg2 ; <- inserted
1331 // ... = vreg3 ; <- replaced from vreg2
1332 // Indirectly, we can create a "copy" between vreg1 and vreg2 even
1333 // though their exact values do not match.
Krzysztof Parzyszek6eba5b82016-07-26 19:08:45 +00001334 BT.visit(*CopyI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001335 Changed = true;
1336 break;
1337 }
1338 }
1339
1340 return Changed;
1341}
1342
1343
1344//
1345// Const generation
1346//
1347// Recognize instructions that produce constant values known at compile-time.
1348// Replace them with register definitions that load these constants directly.
1349namespace {
1350 class ConstGeneration : public Transformation {
1351 public:
1352 ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1353 MachineRegisterInfo &mri)
1354 : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1355 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001356 static bool isTfrConst(const MachineInstr &MI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001357 private:
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001358 unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
1359 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
1360
1361 const HexagonInstrInfo &HII;
1362 MachineRegisterInfo &MRI;
1363 BitTracker &BT;
1364 };
1365}
1366
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001367bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
Duncan P. N. Exon Smith98226e32016-07-12 01:55:32 +00001368 unsigned Opc = MI.getOpcode();
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001369 switch (Opc) {
1370 case Hexagon::A2_combineii:
1371 case Hexagon::A4_combineii:
1372 case Hexagon::A2_tfrsi:
1373 case Hexagon::A2_tfrpi:
Krzysztof Parzyszek1d01a792016-08-16 18:08:40 +00001374 case Hexagon::PS_true:
1375 case Hexagon::PS_false:
Krzysztof Parzyszeka3386502016-08-10 16:46:36 +00001376 case Hexagon::CONST32:
1377 case Hexagon::CONST64:
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001378 return true;
1379 }
1380 return false;
1381}
1382
1383
1384// Generate a transfer-immediate instruction that is appropriate for the
1385// register class and the actual value being transferred.
1386unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
1387 MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
1388 unsigned Reg = MRI.createVirtualRegister(RC);
1389 if (RC == &Hexagon::IntRegsRegClass) {
1390 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
1391 .addImm(int32_t(C));
1392 return Reg;
1393 }
1394
1395 if (RC == &Hexagon::DoubleRegsRegClass) {
1396 if (isInt<8>(C)) {
1397 BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
1398 .addImm(C);
1399 return Reg;
1400 }
1401
1402 unsigned Lo = Lo_32(C), Hi = Hi_32(C);
1403 if (isInt<8>(Lo) || isInt<8>(Hi)) {
1404 unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
1405 : Hexagon::A4_combineii;
1406 BuildMI(B, At, DL, HII.get(Opc), Reg)
1407 .addImm(int32_t(Hi))
1408 .addImm(int32_t(Lo));
1409 return Reg;
1410 }
1411
Krzysztof Parzyszeka3386502016-08-10 16:46:36 +00001412 BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001413 .addImm(C);
1414 return Reg;
1415 }
1416
1417 if (RC == &Hexagon::PredRegsRegClass) {
1418 unsigned Opc;
1419 if (C == 0)
Krzysztof Parzyszek1d01a792016-08-16 18:08:40 +00001420 Opc = Hexagon::PS_false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001421 else if ((C & 0xFF) == 0xFF)
Krzysztof Parzyszek1d01a792016-08-16 18:08:40 +00001422 Opc = Hexagon::PS_true;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001423 else
1424 return 0;
1425 BuildMI(B, At, DL, HII.get(Opc), Reg);
1426 return Reg;
1427 }
1428
1429 return 0;
1430}
1431
1432
1433bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001434 if (!BT.reached(&B))
1435 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001436 bool Changed = false;
1437 RegisterSet Defs;
1438
1439 for (auto I = B.begin(), E = B.end(); I != E; ++I) {
Duncan P. N. Exon Smith98226e32016-07-12 01:55:32 +00001440 if (isTfrConst(*I))
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001441 continue;
1442 Defs.clear();
1443 HBS::getInstrDefs(*I, Defs);
1444 if (Defs.count() != 1)
1445 continue;
1446 unsigned DR = Defs.find_first();
1447 if (!TargetRegisterInfo::isVirtualRegister(DR))
1448 continue;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001449 uint64_t U;
1450 const BitTracker::RegisterCell &DRC = BT.lookup(DR);
1451 if (HBS::getConst(DRC, 0, DRC.width(), U)) {
1452 int64_t C = U;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001453 DebugLoc DL = I->getDebugLoc();
1454 auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1455 unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
1456 if (ImmReg) {
1457 HBS::replaceReg(DR, ImmReg, MRI);
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001458 BT.put(ImmReg, DRC);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001459 Changed = true;
1460 }
1461 }
1462 }
1463 return Changed;
1464}
1465
1466
1467//
1468// Copy generation
1469//
1470// Identify pairs of available registers which hold identical values.
1471// In such cases, only one of them needs to be calculated, the other one
1472// will be defined as a copy of the first.
1473//
1474// Copy propagation
1475//
1476// Eliminate register copies RD = RS, by replacing the uses of RD with
1477// with uses of RS.
1478namespace {
1479 class CopyGeneration : public Transformation {
1480 public:
1481 CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
1482 MachineRegisterInfo &mri)
1483 : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
1484 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1485 private:
1486 bool findMatch(const BitTracker::RegisterRef &Inp,
1487 BitTracker::RegisterRef &Out, const RegisterSet &AVs);
1488
1489 const HexagonInstrInfo &HII;
1490 MachineRegisterInfo &MRI;
1491 BitTracker &BT;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001492 RegisterSet Forbidden;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001493 };
1494
1495 class CopyPropagation : public Transformation {
1496 public:
1497 CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
1498 : Transformation(false), MRI(mri) {}
1499 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001500 static bool isCopyReg(unsigned Opc, bool NoConv);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001501 private:
1502 bool propagateRegCopy(MachineInstr &MI);
1503
1504 MachineRegisterInfo &MRI;
1505 };
1506
1507}
1508
1509
1510/// Check if there is a register in AVs that is identical to Inp. If so,
1511/// set Out to the found register. The output may be a pair Reg:Sub.
1512bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
1513 BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
1514 if (!BT.has(Inp.Reg))
1515 return false;
1516 const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001517 auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001518 unsigned B, W;
1519 if (!HBS::getSubregMask(Inp, B, W, MRI))
1520 return false;
1521
1522 for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001523 if (!BT.has(R) || Forbidden[R])
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001524 continue;
1525 const BitTracker::RegisterCell &RC = BT.lookup(R);
1526 unsigned RW = RC.width();
1527 if (W == RW) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001528 if (FRC != MRI.getRegClass(R))
1529 continue;
1530 if (!HBS::isTransparentCopy(R, Inp, MRI))
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001531 continue;
1532 if (!HBS::isEqual(InpRC, B, RC, 0, W))
1533 continue;
1534 Out.Reg = R;
1535 Out.Sub = 0;
1536 return true;
1537 }
1538 // Check if there is a super-register, whose part (with a subregister)
1539 // is equal to the input.
1540 // Only do double registers for now.
1541 if (W*2 != RW)
1542 continue;
1543 if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
1544 continue;
1545
1546 if (HBS::isEqual(InpRC, B, RC, 0, W))
1547 Out.Sub = Hexagon::subreg_loreg;
1548 else if (HBS::isEqual(InpRC, B, RC, W, W))
1549 Out.Sub = Hexagon::subreg_hireg;
1550 else
1551 continue;
1552 Out.Reg = R;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001553 if (HBS::isTransparentCopy(Out, Inp, MRI))
1554 return true;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001555 }
1556 return false;
1557}
1558
1559
1560bool CopyGeneration::processBlock(MachineBasicBlock &B,
1561 const RegisterSet &AVs) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001562 if (!BT.reached(&B))
1563 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001564 RegisterSet AVB(AVs);
1565 bool Changed = false;
1566 RegisterSet Defs;
1567
1568 for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
1569 ++I, AVB.insert(Defs)) {
1570 NextI = std::next(I);
1571 Defs.clear();
1572 HBS::getInstrDefs(*I, Defs);
1573
1574 unsigned Opc = I->getOpcode();
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001575 if (CopyPropagation::isCopyReg(Opc, false) ||
1576 ConstGeneration::isTfrConst(*I))
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001577 continue;
1578
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001579 DebugLoc DL = I->getDebugLoc();
1580 auto At = I->isPHI() ? B.getFirstNonPHI() : I;
1581
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001582 for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
1583 BitTracker::RegisterRef MR;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001584 auto *FRC = HBS::getFinalVRegClass(R, MRI);
1585
1586 if (findMatch(R, MR, AVB)) {
1587 unsigned NewR = MRI.createVirtualRegister(FRC);
1588 BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
1589 .addReg(MR.Reg, 0, MR.Sub);
1590 BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
1591 HBS::replaceReg(R, NewR, MRI);
1592 Forbidden.insert(R);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001593 continue;
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001594 }
1595
Krzysztof Parzyszek824d3472016-08-02 21:49:20 +00001596 if (FRC == &Hexagon::DoubleRegsRegClass ||
1597 FRC == &Hexagon::VecDblRegsRegClass ||
1598 FRC == &Hexagon::VecDblRegs128BRegClass) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00001599 // Try to generate REG_SEQUENCE.
1600 BitTracker::RegisterRef TL = { R, Hexagon::subreg_loreg };
1601 BitTracker::RegisterRef TH = { R, Hexagon::subreg_hireg };
1602 BitTracker::RegisterRef ML, MH;
1603 if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
1604 auto *FRC = HBS::getFinalVRegClass(R, MRI);
1605 unsigned NewR = MRI.createVirtualRegister(FRC);
1606 BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
1607 .addReg(ML.Reg, 0, ML.Sub)
1608 .addImm(Hexagon::subreg_loreg)
1609 .addReg(MH.Reg, 0, MH.Sub)
1610 .addImm(Hexagon::subreg_hireg);
1611 BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
1612 HBS::replaceReg(R, NewR, MRI);
1613 Forbidden.insert(R);
1614 }
1615 }
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001616 }
1617 }
1618
1619 return Changed;
1620}
1621
1622
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001623bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001624 switch (Opc) {
1625 case TargetOpcode::COPY:
1626 case TargetOpcode::REG_SEQUENCE:
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001627 case Hexagon::A4_combineir:
1628 case Hexagon::A4_combineri:
1629 return true;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001630 case Hexagon::A2_tfr:
1631 case Hexagon::A2_tfrp:
1632 case Hexagon::A2_combinew:
Krzysztof Parzyszek824d3472016-08-02 21:49:20 +00001633 case Hexagon::V6_vcombine:
1634 case Hexagon::V6_vcombine_128B:
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001635 return NoConv;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001636 default:
1637 break;
1638 }
1639 return false;
1640}
1641
1642
1643bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
1644 bool Changed = false;
1645 unsigned Opc = MI.getOpcode();
1646 BitTracker::RegisterRef RD = MI.getOperand(0);
1647 assert(MI.getOperand(0).getSubReg() == 0);
1648
1649 switch (Opc) {
1650 case TargetOpcode::COPY:
1651 case Hexagon::A2_tfr:
1652 case Hexagon::A2_tfrp: {
1653 BitTracker::RegisterRef RS = MI.getOperand(1);
1654 if (!HBS::isTransparentCopy(RD, RS, MRI))
1655 break;
1656 if (RS.Sub != 0)
1657 Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
1658 else
1659 Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
1660 break;
1661 }
1662 case TargetOpcode::REG_SEQUENCE: {
1663 BitTracker::RegisterRef SL, SH;
1664 if (HBS::parseRegSequence(MI, SL, SH)) {
1665 Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
1666 SL.Reg, SL.Sub, MRI);
1667 Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
1668 SH.Reg, SH.Sub, MRI);
1669 }
1670 break;
1671 }
Krzysztof Parzyszek824d3472016-08-02 21:49:20 +00001672 case Hexagon::A2_combinew:
1673 case Hexagon::V6_vcombine:
1674 case Hexagon::V6_vcombine_128B: {
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001675 BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
1676 Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
1677 RL.Reg, RL.Sub, MRI);
1678 Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
1679 RH.Reg, RH.Sub, MRI);
1680 break;
1681 }
1682 case Hexagon::A4_combineir:
1683 case Hexagon::A4_combineri: {
1684 unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
1685 unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
1686 : Hexagon::subreg_hireg;
1687 BitTracker::RegisterRef RS = MI.getOperand(SrcX);
1688 Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
1689 break;
1690 }
1691 }
1692 return Changed;
1693}
1694
1695
1696bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
1697 std::vector<MachineInstr*> Instrs;
1698 for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
1699 Instrs.push_back(&*I);
1700
1701 bool Changed = false;
1702 for (auto I : Instrs) {
1703 unsigned Opc = I->getOpcode();
Krzysztof Parzyszek23ee12e2016-08-03 18:35:48 +00001704 if (!CopyPropagation::isCopyReg(Opc, true))
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001705 continue;
1706 Changed |= propagateRegCopy(*I);
1707 }
1708
1709 return Changed;
1710}
1711
1712
1713//
1714// Bit simplification
1715//
1716// Recognize patterns that can be simplified and replace them with the
1717// simpler forms.
1718// This is by no means complete
1719namespace {
1720 class BitSimplification : public Transformation {
1721 public:
1722 BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001723 const HexagonRegisterInfo &hri, MachineRegisterInfo &mri,
1724 MachineFunction &mf)
1725 : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {}
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001726 bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
1727 private:
1728 struct RegHalf : public BitTracker::RegisterRef {
1729 bool Low; // Low/High halfword.
1730 };
1731
1732 bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
1733 unsigned B, RegHalf &RH);
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001734 bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001735
1736 bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
1737 BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
1738 unsigned getCombineOpcode(bool HLow, bool LLow);
1739
1740 bool genStoreUpperHalf(MachineInstr *MI);
1741 bool genStoreImmediate(MachineInstr *MI);
1742 bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
1743 const BitTracker::RegisterCell &RC);
1744 bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1745 const BitTracker::RegisterCell &RC);
1746 bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
1747 const BitTracker::RegisterCell &RC);
1748 bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
1749 const BitTracker::RegisterCell &RC);
1750 bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
1751 const BitTracker::RegisterCell &RC);
1752
1753 const HexagonInstrInfo &HII;
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001754 const HexagonRegisterInfo &HRI;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001755 MachineRegisterInfo &MRI;
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001756 MachineFunction &MF;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001757 BitTracker &BT;
1758 };
1759}
1760
1761
1762// Check if the bits [B..B+16) in register cell RC form a valid halfword,
1763// i.e. [0..16), [16..32), etc. of some register. If so, return true and
1764// set the information about the found register in RH.
1765bool BitSimplification::matchHalf(unsigned SelfR,
1766 const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
1767 // XXX This could be searching in the set of available registers, in case
1768 // the match is not exact.
1769
1770 // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1771 // register and all the bits B..B+15 match between RC and the register.
1772 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1773 // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1774 bool Low = false;
1775 unsigned I = B;
1776 while (I < B+16 && RC[I].num())
1777 I++;
1778 if (I == B+16)
1779 return false;
1780
1781 unsigned Reg = RC[I].RefI.Reg;
1782 unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
1783 if (P < I-B)
1784 return false;
1785 unsigned Pos = P - (I-B);
1786
1787 if (Reg == 0 || Reg == SelfR) // Don't match "self".
1788 return false;
1789 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1790 return false;
1791 if (!BT.has(Reg))
1792 return false;
1793
1794 const BitTracker::RegisterCell &SC = BT.lookup(Reg);
1795 if (Pos+16 > SC.width())
1796 return false;
1797
1798 for (unsigned i = 0; i < 16; ++i) {
1799 const BitTracker::BitValue &RV = RC[i+B];
1800 if (RV.Type == BitTracker::BitValue::Ref) {
1801 if (RV.RefI.Reg != Reg)
1802 return false;
1803 if (RV.RefI.Pos != i+Pos)
1804 return false;
1805 continue;
1806 }
1807 if (RC[i+B] != SC[i+Pos])
1808 return false;
1809 }
1810
1811 unsigned Sub = 0;
1812 switch (Pos) {
1813 case 0:
1814 Sub = Hexagon::subreg_loreg;
1815 Low = true;
1816 break;
1817 case 16:
1818 Sub = Hexagon::subreg_loreg;
1819 Low = false;
1820 break;
1821 case 32:
1822 Sub = Hexagon::subreg_hireg;
1823 Low = true;
1824 break;
1825 case 48:
1826 Sub = Hexagon::subreg_hireg;
1827 Low = false;
1828 break;
1829 default:
1830 return false;
1831 }
1832
1833 RH.Reg = Reg;
1834 RH.Sub = Sub;
1835 RH.Low = Low;
1836 // If the subregister is not valid with the register, set it to 0.
1837 if (!HBS::getFinalVRegClass(RH, MRI))
1838 RH.Sub = 0;
1839
1840 return true;
1841}
1842
1843
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001844bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
1845 unsigned OpNum) {
1846 auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
1847 auto *RRC = HBS::getFinalVRegClass(R, MRI);
1848 return OpRC->hasSubClassEq(RRC);
1849}
1850
1851
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001852// Check if RC matches the pattern of a S2_packhl. If so, return true and
1853// set the inputs Rs and Rt.
1854bool BitSimplification::matchPackhl(unsigned SelfR,
1855 const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
1856 BitTracker::RegisterRef &Rt) {
1857 RegHalf L1, H1, L2, H2;
1858
1859 if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
1860 return false;
1861 if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
1862 return false;
1863
1864 // Rs = H1.L1, Rt = H2.L2
1865 if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
1866 return false;
1867 if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
1868 return false;
1869
1870 Rs = H1;
1871 Rt = H2;
1872 return true;
1873}
1874
1875
1876unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
1877 return HLow ? LLow ? Hexagon::A2_combine_ll
1878 : Hexagon::A2_combine_lh
1879 : LLow ? Hexagon::A2_combine_hl
1880 : Hexagon::A2_combine_hh;
1881}
1882
1883
1884// If MI stores the upper halfword of a register (potentially obtained via
1885// shifts or extracts), replace it with a storerf instruction. This could
1886// cause the "extraction" code to become dead.
1887bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
1888 unsigned Opc = MI->getOpcode();
1889 if (Opc != Hexagon::S2_storerh_io)
1890 return false;
1891
1892 MachineOperand &ValOp = MI->getOperand(2);
1893 BitTracker::RegisterRef RS = ValOp;
1894 if (!BT.has(RS.Reg))
1895 return false;
1896 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1897 RegHalf H;
1898 if (!matchHalf(0, RC, 0, H))
1899 return false;
1900 if (H.Low)
1901 return false;
1902 MI->setDesc(HII.get(Hexagon::S2_storerf_io));
1903 ValOp.setReg(H.Reg);
1904 ValOp.setSubReg(H.Sub);
1905 return true;
1906}
1907
1908
1909// If MI stores a value known at compile-time, and the value is within a range
1910// that avoids using constant-extenders, replace it with a store-immediate.
1911bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
1912 unsigned Opc = MI->getOpcode();
1913 unsigned Align = 0;
1914 switch (Opc) {
1915 case Hexagon::S2_storeri_io:
1916 Align++;
1917 case Hexagon::S2_storerh_io:
1918 Align++;
1919 case Hexagon::S2_storerb_io:
1920 break;
1921 default:
1922 return false;
1923 }
1924
1925 // Avoid stores to frame-indices (due to an unknown offset).
1926 if (!MI->getOperand(0).isReg())
1927 return false;
1928 MachineOperand &OffOp = MI->getOperand(1);
1929 if (!OffOp.isImm())
1930 return false;
1931
1932 int64_t Off = OffOp.getImm();
1933 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1934 if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
1935 return false;
1936 // Source register:
1937 BitTracker::RegisterRef RS = MI->getOperand(2);
1938 if (!BT.has(RS.Reg))
1939 return false;
1940 const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
1941 uint64_t U;
1942 if (!HBS::getConst(RC, 0, RC.width(), U))
1943 return false;
1944
1945 // Only consider 8-bit values to avoid constant-extenders.
1946 int V;
1947 switch (Opc) {
1948 case Hexagon::S2_storerb_io:
1949 V = int8_t(U);
1950 break;
1951 case Hexagon::S2_storerh_io:
1952 V = int16_t(U);
1953 break;
1954 case Hexagon::S2_storeri_io:
1955 V = int32_t(U);
1956 break;
1957 }
1958 if (!isInt<8>(V))
1959 return false;
1960
1961 MI->RemoveOperand(2);
1962 switch (Opc) {
1963 case Hexagon::S2_storerb_io:
1964 MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
1965 break;
1966 case Hexagon::S2_storerh_io:
1967 MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
1968 break;
1969 case Hexagon::S2_storeri_io:
1970 MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
1971 break;
1972 }
1973 MI->addOperand(MachineOperand::CreateImm(V));
1974 return true;
1975}
1976
1977
1978// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
1979// last instruction in a sequence that results in something equivalent to
1980// the pack-halfwords. The intent is to cause the entire sequence to become
1981// dead.
1982bool BitSimplification::genPackhl(MachineInstr *MI,
1983 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
1984 unsigned Opc = MI->getOpcode();
1985 if (Opc == Hexagon::S2_packhl)
1986 return false;
1987 BitTracker::RegisterRef Rs, Rt;
1988 if (!matchPackhl(RD.Reg, RC, Rs, Rt))
1989 return false;
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00001990 if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
1991 !validateReg(Rt, Hexagon::S2_packhl, 2))
1992 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00001993
1994 MachineBasicBlock &B = *MI->getParent();
1995 unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
1996 DebugLoc DL = MI->getDebugLoc();
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00001997 auto At = MI->isPHI() ? B.getFirstNonPHI()
1998 : MachineBasicBlock::iterator(MI);
1999 BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002000 .addReg(Rs.Reg, 0, Rs.Sub)
2001 .addReg(Rt.Reg, 0, Rt.Sub);
2002 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2003 BT.put(BitTracker::RegisterRef(NewR), RC);
2004 return true;
2005}
2006
2007
2008// If MI produces halfword of the input in the low half of the output,
2009// replace it with zero-extend or extractu.
2010bool BitSimplification::genExtractHalf(MachineInstr *MI,
2011 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2012 RegHalf L;
2013 // Check for halfword in low 16 bits, zeros elsewhere.
2014 if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
2015 return false;
2016
2017 unsigned Opc = MI->getOpcode();
2018 MachineBasicBlock &B = *MI->getParent();
2019 DebugLoc DL = MI->getDebugLoc();
2020
2021 // Prefer zxth, since zxth can go in any slot, while extractu only in
2022 // slots 2 and 3.
2023 unsigned NewR = 0;
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002024 auto At = MI->isPHI() ? B.getFirstNonPHI()
2025 : MachineBasicBlock::iterator(MI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002026 if (L.Low && Opc != Hexagon::A2_zxth) {
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00002027 if (validateReg(L, Hexagon::A2_zxth, 1)) {
2028 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2029 BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
2030 .addReg(L.Reg, 0, L.Sub);
2031 }
Krzysztof Parzyszek0d112122016-01-14 21:59:22 +00002032 } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00002033 if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
2034 NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2035 BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
2036 .addReg(L.Reg, 0, L.Sub)
2037 .addImm(16);
2038 }
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002039 }
2040 if (NewR == 0)
2041 return false;
2042 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2043 BT.put(BitTracker::RegisterRef(NewR), RC);
2044 return true;
2045}
2046
2047
2048// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2049// combine.
2050bool BitSimplification::genCombineHalf(MachineInstr *MI,
2051 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2052 RegHalf L, H;
2053 // Check for combine h/l
2054 if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
2055 return false;
2056 // Do nothing if this is just a reg copy.
2057 if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
2058 return false;
2059
2060 unsigned Opc = MI->getOpcode();
2061 unsigned COpc = getCombineOpcode(H.Low, L.Low);
2062 if (COpc == Opc)
2063 return false;
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00002064 if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
2065 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002066
2067 MachineBasicBlock &B = *MI->getParent();
2068 DebugLoc DL = MI->getDebugLoc();
2069 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002070 auto At = MI->isPHI() ? B.getFirstNonPHI()
2071 : MachineBasicBlock::iterator(MI);
2072 BuildMI(B, At, DL, HII.get(COpc), NewR)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002073 .addReg(H.Reg, 0, H.Sub)
2074 .addReg(L.Reg, 0, L.Sub);
2075 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2076 BT.put(BitTracker::RegisterRef(NewR), RC);
2077 return true;
2078}
2079
2080
2081// If MI resets high bits of a register and keeps the lower ones, replace it
2082// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2083bool BitSimplification::genExtractLow(MachineInstr *MI,
2084 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2085 unsigned Opc = MI->getOpcode();
2086 switch (Opc) {
2087 case Hexagon::A2_zxtb:
2088 case Hexagon::A2_zxth:
2089 case Hexagon::S2_extractu:
2090 return false;
2091 }
2092 if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
2093 int32_t Imm = MI->getOperand(2).getImm();
2094 if (isInt<10>(Imm))
2095 return false;
2096 }
2097
2098 if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
2099 return false;
2100 unsigned W = RC.width();
2101 while (W > 0 && RC[W-1].is(0))
2102 W--;
2103 if (W == 0 || W == RC.width())
2104 return false;
2105 unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
2106 : (W == 16) ? Hexagon::A2_zxth
2107 : (W < 10) ? Hexagon::A2_andir
2108 : Hexagon::S2_extractu;
2109 MachineBasicBlock &B = *MI->getParent();
2110 DebugLoc DL = MI->getDebugLoc();
2111
2112 for (auto &Op : MI->uses()) {
2113 if (!Op.isReg())
2114 continue;
2115 BitTracker::RegisterRef RS = Op;
2116 if (!BT.has(RS.Reg))
2117 continue;
2118 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2119 unsigned BN, BW;
2120 if (!HBS::getSubregMask(RS, BN, BW, MRI))
2121 continue;
2122 if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
2123 continue;
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00002124 if (!validateReg(RS, NewOpc, 1))
2125 continue;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002126
2127 unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002128 auto At = MI->isPHI() ? B.getFirstNonPHI()
2129 : MachineBasicBlock::iterator(MI);
2130 auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002131 .addReg(RS.Reg, 0, RS.Sub);
2132 if (NewOpc == Hexagon::A2_andir)
2133 MIB.addImm((1 << W) - 1);
2134 else if (NewOpc == Hexagon::S2_extractu)
2135 MIB.addImm(W).addImm(0);
2136 HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
2137 BT.put(BitTracker::RegisterRef(NewR), RC);
2138 return true;
2139 }
2140 return false;
2141}
2142
2143
2144// Check for tstbit simplification opportunity, where the bit being checked
2145// can be tracked back to another register. For example:
2146// vreg2 = S2_lsr_i_r vreg1, 5
2147// vreg3 = S2_tstbit_i vreg2, 0
2148// =>
2149// vreg3 = S2_tstbit_i vreg1, 5
2150bool BitSimplification::simplifyTstbit(MachineInstr *MI,
2151 BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
2152 unsigned Opc = MI->getOpcode();
2153 if (Opc != Hexagon::S2_tstbit_i)
2154 return false;
2155
2156 unsigned BN = MI->getOperand(2).getImm();
2157 BitTracker::RegisterRef RS = MI->getOperand(1);
2158 unsigned F, W;
2159 DebugLoc DL = MI->getDebugLoc();
2160 if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
2161 return false;
2162 MachineBasicBlock &B = *MI->getParent();
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002163 auto At = MI->isPHI() ? B.getFirstNonPHI()
2164 : MachineBasicBlock::iterator(MI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002165
2166 const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
2167 const BitTracker::BitValue &V = SC[F+BN];
2168 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
2169 const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
2170 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2171 // a double register, need to use a subregister and adjust bit
2172 // number.
2173 unsigned P = UINT_MAX;
2174 BitTracker::RegisterRef RR(V.RefI.Reg, 0);
2175 if (TC == &Hexagon::DoubleRegsRegClass) {
2176 P = V.RefI.Pos;
2177 RR.Sub = Hexagon::subreg_loreg;
2178 if (P >= 32) {
2179 P -= 32;
2180 RR.Sub = Hexagon::subreg_hireg;
2181 }
2182 } else if (TC == &Hexagon::IntRegsRegClass) {
2183 P = V.RefI.Pos;
2184 }
2185 if (P != UINT_MAX) {
2186 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002187 BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002188 .addReg(RR.Reg, 0, RR.Sub)
2189 .addImm(P);
2190 HBS::replaceReg(RD.Reg, NewR, MRI);
2191 BT.put(NewR, RC);
2192 return true;
2193 }
2194 } else if (V.is(0) || V.is(1)) {
2195 unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
Krzysztof Parzyszek1d01a792016-08-16 18:08:40 +00002196 unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
Krzysztof Parzyszeka3c5d442016-01-13 15:48:18 +00002197 BuildMI(B, At, DL, HII.get(NewOpc), NewR);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002198 HBS::replaceReg(RD.Reg, NewR, MRI);
2199 return true;
2200 }
2201
2202 return false;
2203}
2204
2205
2206bool BitSimplification::processBlock(MachineBasicBlock &B,
2207 const RegisterSet &AVs) {
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00002208 if (!BT.reached(&B))
2209 return false;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002210 bool Changed = false;
2211 RegisterSet AVB = AVs;
2212 RegisterSet Defs;
2213
2214 for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
2215 MachineInstr *MI = &*I;
2216 Defs.clear();
2217 HBS::getInstrDefs(*MI, Defs);
2218
2219 unsigned Opc = MI->getOpcode();
2220 if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
2221 continue;
2222
2223 if (MI->mayStore()) {
2224 bool T = genStoreUpperHalf(MI);
2225 T = T || genStoreImmediate(MI);
2226 Changed |= T;
2227 continue;
2228 }
2229
2230 if (Defs.count() != 1)
2231 continue;
2232 const MachineOperand &Op0 = MI->getOperand(0);
2233 if (!Op0.isReg() || !Op0.isDef())
2234 continue;
2235 BitTracker::RegisterRef RD = Op0;
2236 if (!BT.has(RD.Reg))
2237 continue;
2238 const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
2239 const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
2240
2241 if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
2242 bool T = genPackhl(MI, RD, RC);
2243 Changed |= T;
2244 continue;
2245 }
2246
2247 if (FRC->getID() == Hexagon::IntRegsRegClassID) {
2248 bool T = genExtractHalf(MI, RD, RC);
2249 T = T || genCombineHalf(MI, RD, RC);
2250 T = T || genExtractLow(MI, RD, RC);
2251 Changed |= T;
2252 continue;
2253 }
2254
2255 if (FRC->getID() == Hexagon::PredRegsRegClassID) {
2256 bool T = simplifyTstbit(MI, RD, RC);
2257 Changed |= T;
2258 continue;
2259 }
2260 }
2261 return Changed;
2262}
2263
2264
2265bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
Andrew Kaylor5b444a22016-04-26 19:46:28 +00002266 if (skipFunction(*MF.getFunction()))
2267 return false;
2268
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002269 auto &HST = MF.getSubtarget<HexagonSubtarget>();
2270 auto &HRI = *HST.getRegisterInfo();
2271 auto &HII = *HST.getInstrInfo();
2272
2273 MDT = &getAnalysis<MachineDominatorTree>();
2274 MachineRegisterInfo &MRI = MF.getRegInfo();
2275 bool Changed;
2276
2277 Changed = DeadCodeElimination(MF, *MDT).run();
2278
2279 const HexagonEvaluator HE(HRI, MRI, HII, MF);
2280 BitTracker BT(HE, MF);
2281 DEBUG(BT.trace(true));
2282 BT.run();
2283
2284 MachineBasicBlock &Entry = MF.front();
2285
2286 RegisterSet AIG; // Available registers for IG.
2287 ConstGeneration ImmG(BT, HII, MRI);
2288 Changed |= visitBlock(Entry, ImmG, AIG);
2289
2290 RegisterSet ARE; // Available registers for RIE.
2291 RedundantInstrElimination RIE(BT, HII, MRI);
Krzysztof Parzyszek1adca302016-07-26 18:30:11 +00002292 bool Ried = visitBlock(Entry, RIE, ARE);
2293 if (Ried) {
2294 Changed = true;
2295 BT.run();
2296 }
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002297
2298 RegisterSet ACG; // Available registers for CG.
2299 CopyGeneration CopyG(BT, HII, MRI);
2300 Changed |= visitBlock(Entry, CopyG, ACG);
2301
2302 RegisterSet ACP; // Available registers for CP.
2303 CopyPropagation CopyP(HRI, MRI);
2304 Changed |= visitBlock(Entry, CopyP, ACP);
2305
2306 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2307
2308 BT.run();
2309 RegisterSet ABS; // Available registers for BS.
Krzysztof Parzyszek04c07962016-08-04 17:56:19 +00002310 BitSimplification BitS(BT, HII, HRI, MRI, MF);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002311 Changed |= visitBlock(Entry, BitS, ABS);
2312
2313 Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
2314
2315 if (Changed) {
2316 for (auto &B : MF)
2317 for (auto &I : B)
2318 I.clearKillInfo();
2319 DeadCodeElimination(MF, *MDT).run();
2320 }
2321 return Changed;
2322}
2323
2324
2325// Recognize loops where the code at the end of the loop matches the code
2326// before the entry of the loop, and the matching code is such that is can
2327// be simplified. This pass relies on the bit simplification above and only
2328// prepares code in a way that can be handled by the bit simplifcation.
2329//
2330// This is the motivating testcase (and explanation):
2331//
2332// {
2333// loop0(.LBB0_2, r1) // %for.body.preheader
2334// r5:4 = memd(r0++#8)
2335// }
2336// {
2337// r3 = lsr(r4, #16)
2338// r7:6 = combine(r5, r5)
2339// }
2340// {
2341// r3 = insert(r5, #16, #16)
2342// r7:6 = vlsrw(r7:6, #16)
2343// }
2344// .LBB0_2:
2345// {
2346// memh(r2+#4) = r5
2347// memh(r2+#6) = r6 # R6 is really R5.H
2348// }
2349// {
2350// r2 = add(r2, #8)
2351// memh(r2+#0) = r4
2352// memh(r2+#2) = r3 # R3 is really R4.H
2353// }
2354// {
2355// r5:4 = memd(r0++#8)
2356// }
2357// { # "Shuffling" code that sets up R3 and R6
2358// r3 = lsr(r4, #16) # so that their halves can be stored in the
2359// r7:6 = combine(r5, r5) # next iteration. This could be folded into
2360// } # the stores if the code was at the beginning
2361// { # of the loop iteration. Since the same code
2362// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2363// r7:6 = vlsrw(r7:6, #16) # there.
2364// }:endloop0
2365//
2366//
2367// The outcome:
2368//
2369// {
2370// loop0(.LBB0_2, r1)
2371// r5:4 = memd(r0++#8)
2372// }
2373// .LBB0_2:
2374// {
2375// memh(r2+#4) = r5
2376// memh(r2+#6) = r5.h
2377// }
2378// {
2379// r2 = add(r2, #8)
2380// memh(r2+#0) = r4
2381// memh(r2+#2) = r4.h
2382// }
2383// {
2384// r5:4 = memd(r0++#8)
2385// }:endloop0
2386
2387namespace llvm {
2388 FunctionPass *createHexagonLoopRescheduling();
2389 void initializeHexagonLoopReschedulingPass(PassRegistry&);
2390}
2391
2392namespace {
2393 class HexagonLoopRescheduling : public MachineFunctionPass {
2394 public:
2395 static char ID;
2396 HexagonLoopRescheduling() : MachineFunctionPass(ID),
2397 HII(0), HRI(0), MRI(0), BTP(0) {
2398 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2399 }
2400
2401 bool runOnMachineFunction(MachineFunction &MF) override;
2402
2403 private:
2404 const HexagonInstrInfo *HII;
2405 const HexagonRegisterInfo *HRI;
2406 MachineRegisterInfo *MRI;
2407 BitTracker *BTP;
2408
2409 struct LoopCand {
2410 LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
2411 MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
2412 MachineBasicBlock *LB, *PB, *EB;
2413 };
2414 typedef std::vector<MachineInstr*> InstrList;
2415 struct InstrGroup {
2416 BitTracker::RegisterRef Inp, Out;
2417 InstrList Ins;
2418 };
2419 struct PhiInfo {
2420 PhiInfo(MachineInstr &P, MachineBasicBlock &B);
2421 unsigned DefR;
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002422 BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
2423 MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002424 };
2425
2426 static unsigned getDefReg(const MachineInstr *MI);
2427 bool isConst(unsigned Reg) const;
2428 bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
2429 bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
2430 bool isShuffleOf(unsigned OutR, unsigned InpR) const;
2431 bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
2432 unsigned &InpR2) const;
2433 void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
2434 MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
2435 bool processLoop(LoopCand &C);
2436 };
2437}
2438
2439char HexagonLoopRescheduling::ID = 0;
2440
2441INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
2442 "Hexagon Loop Rescheduling", false, false)
2443
2444
2445HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
2446 MachineBasicBlock &B) {
2447 DefR = HexagonLoopRescheduling::getDefReg(&P);
2448 LB = &B;
2449 PB = nullptr;
2450 for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
2451 const MachineOperand &OpB = P.getOperand(i+1);
2452 if (OpB.getMBB() == &B) {
2453 LR = P.getOperand(i);
2454 continue;
2455 }
2456 PB = OpB.getMBB();
2457 PR = P.getOperand(i);
2458 }
2459}
2460
2461
2462unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
2463 RegisterSet Defs;
2464 HBS::getInstrDefs(*MI, Defs);
2465 if (Defs.count() != 1)
2466 return 0;
2467 return Defs.find_first();
2468}
2469
2470
2471bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
2472 if (!BTP->has(Reg))
2473 return false;
2474 const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
2475 for (unsigned i = 0, w = RC.width(); i < w; ++i) {
2476 const BitTracker::BitValue &V = RC[i];
2477 if (!V.is(0) && !V.is(1))
2478 return false;
2479 }
2480 return true;
2481}
2482
2483
2484bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
2485 unsigned DefR) const {
2486 unsigned Opc = MI->getOpcode();
2487 switch (Opc) {
2488 case TargetOpcode::COPY:
2489 case Hexagon::S2_lsr_i_r:
2490 case Hexagon::S2_asr_i_r:
2491 case Hexagon::S2_asl_i_r:
2492 case Hexagon::S2_lsr_i_p:
2493 case Hexagon::S2_asr_i_p:
2494 case Hexagon::S2_asl_i_p:
2495 case Hexagon::S2_insert:
2496 case Hexagon::A2_or:
2497 case Hexagon::A2_orp:
2498 case Hexagon::A2_and:
2499 case Hexagon::A2_andp:
2500 case Hexagon::A2_combinew:
2501 case Hexagon::A4_combineri:
2502 case Hexagon::A4_combineir:
2503 case Hexagon::A2_combineii:
2504 case Hexagon::A4_combineii:
2505 case Hexagon::A2_combine_ll:
2506 case Hexagon::A2_combine_lh:
2507 case Hexagon::A2_combine_hl:
2508 case Hexagon::A2_combine_hh:
2509 return true;
2510 }
2511 return false;
2512}
2513
2514
2515bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
2516 unsigned InpR) const {
2517 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
2518 const MachineOperand &Op = MI->getOperand(i);
2519 if (!Op.isReg())
2520 continue;
2521 if (Op.getReg() == InpR)
2522 return i == n-1;
2523 }
2524 return false;
2525}
2526
2527
2528bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
2529 if (!BTP->has(OutR) || !BTP->has(InpR))
2530 return false;
2531 const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
2532 for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
2533 const BitTracker::BitValue &V = OutC[i];
2534 if (V.Type != BitTracker::BitValue::Ref)
2535 continue;
2536 if (V.RefI.Reg != InpR)
2537 return false;
2538 }
2539 return true;
2540}
2541
2542
2543bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
2544 unsigned OutR2, unsigned &InpR2) const {
2545 if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
2546 return false;
2547 const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
2548 const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
2549 unsigned W = OutC1.width();
2550 unsigned MatchR = 0;
2551 if (W != OutC2.width())
2552 return false;
2553 for (unsigned i = 0; i < W; ++i) {
2554 const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
2555 if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
2556 return false;
2557 if (V1.Type != BitTracker::BitValue::Ref)
2558 continue;
2559 if (V1.RefI.Pos != V2.RefI.Pos)
2560 return false;
2561 if (V1.RefI.Reg != InpR1)
2562 return false;
2563 if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
2564 return false;
2565 if (!MatchR)
2566 MatchR = V2.RefI.Reg;
2567 else if (V2.RefI.Reg != MatchR)
2568 return false;
2569 }
2570 InpR2 = MatchR;
2571 return true;
2572}
2573
2574
2575void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
2576 MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
2577 unsigned NewPredR) {
2578 DenseMap<unsigned,unsigned> RegMap;
2579
2580 const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
2581 unsigned PhiR = MRI->createVirtualRegister(PhiRC);
2582 BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
2583 .addReg(NewPredR)
2584 .addMBB(&PB)
2585 .addReg(G.Inp.Reg)
2586 .addMBB(&LB);
2587 RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
2588
2589 for (unsigned i = G.Ins.size(); i > 0; --i) {
2590 const MachineInstr *SI = G.Ins[i-1];
2591 unsigned DR = getDefReg(SI);
2592 const TargetRegisterClass *RC = MRI->getRegClass(DR);
2593 unsigned NewDR = MRI->createVirtualRegister(RC);
2594 DebugLoc DL = SI->getDebugLoc();
2595
2596 auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
2597 for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
2598 const MachineOperand &Op = SI->getOperand(j);
2599 if (!Op.isReg()) {
2600 MIB.addOperand(Op);
2601 continue;
2602 }
2603 if (!Op.isUse())
2604 continue;
2605 unsigned UseR = RegMap[Op.getReg()];
2606 MIB.addReg(UseR, 0, Op.getSubReg());
2607 }
2608 RegMap.insert(std::make_pair(DR, NewDR));
2609 }
2610
2611 HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
2612}
2613
2614
2615bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
2616 DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
2617 std::vector<PhiInfo> Phis;
2618 for (auto &I : *C.LB) {
2619 if (!I.isPHI())
2620 break;
2621 unsigned PR = getDefReg(&I);
2622 if (isConst(PR))
2623 continue;
2624 bool BadUse = false, GoodUse = false;
2625 for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
2626 MachineInstr *UseI = UI->getParent();
2627 if (UseI->getParent() != C.LB) {
2628 BadUse = true;
2629 break;
2630 }
2631 if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
2632 GoodUse = true;
2633 }
2634 if (BadUse || !GoodUse)
2635 continue;
2636
2637 Phis.push_back(PhiInfo(I, *C.LB));
2638 }
2639
2640 DEBUG({
2641 dbgs() << "Phis: {";
2642 for (auto &I : Phis) {
2643 dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
2644 << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
2645 << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
2646 << I.LB->getNumber() << ')';
2647 }
2648 dbgs() << " }\n";
2649 });
2650
2651 if (Phis.empty())
2652 return false;
2653
2654 bool Changed = false;
2655 InstrList ShufIns;
2656
2657 // Go backwards in the block: for each bit shuffling instruction, check
2658 // if that instruction could potentially be moved to the front of the loop:
2659 // the output of the loop cannot be used in a non-shuffling instruction
2660 // in this loop.
2661 for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
2662 if (I->isTerminator())
2663 continue;
2664 if (I->isPHI())
2665 break;
2666
2667 RegisterSet Defs;
2668 HBS::getInstrDefs(*I, Defs);
2669 if (Defs.count() != 1)
2670 continue;
2671 unsigned DefR = Defs.find_first();
2672 if (!TargetRegisterInfo::isVirtualRegister(DefR))
2673 continue;
2674 if (!isBitShuffle(&*I, DefR))
2675 continue;
2676
2677 bool BadUse = false;
2678 for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
2679 MachineInstr *UseI = UI->getParent();
2680 if (UseI->getParent() == C.LB) {
2681 if (UseI->isPHI()) {
2682 // If the use is in a phi node in this loop, then it should be
2683 // the value corresponding to the back edge.
2684 unsigned Idx = UI.getOperandNo();
2685 if (UseI->getOperand(Idx+1).getMBB() != C.LB)
2686 BadUse = true;
2687 } else {
David Majnemer0d955d02016-08-11 22:21:41 +00002688 auto F = find(ShufIns, UseI);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002689 if (F == ShufIns.end())
2690 BadUse = true;
2691 }
2692 } else {
2693 // There is a use outside of the loop, but there is no epilog block
2694 // suitable for a copy-out.
2695 if (C.EB == nullptr)
2696 BadUse = true;
2697 }
2698 if (BadUse)
2699 break;
2700 }
2701
2702 if (BadUse)
2703 continue;
2704 ShufIns.push_back(&*I);
2705 }
2706
2707 // Partition the list of shuffling instructions into instruction groups,
2708 // where each group has to be moved as a whole (i.e. a group is a chain of
2709 // dependent instructions). A group produces a single live output register,
2710 // which is meant to be the input of the loop phi node (although this is
2711 // not checked here yet). It also uses a single register as its input,
2712 // which is some value produced in the loop body. After moving the group
2713 // to the beginning of the loop, that input register would need to be
2714 // the loop-carried register (through a phi node) instead of the (currently
2715 // loop-carried) output register.
2716 typedef std::vector<InstrGroup> InstrGroupList;
2717 InstrGroupList Groups;
2718
2719 for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
2720 MachineInstr *SI = ShufIns[i];
2721 if (SI == nullptr)
2722 continue;
2723
2724 InstrGroup G;
2725 G.Ins.push_back(SI);
2726 G.Out.Reg = getDefReg(SI);
2727 RegisterSet Inputs;
2728 HBS::getInstrUses(*SI, Inputs);
2729
2730 for (unsigned j = i+1; j < n; ++j) {
2731 MachineInstr *MI = ShufIns[j];
2732 if (MI == nullptr)
2733 continue;
2734 RegisterSet Defs;
2735 HBS::getInstrDefs(*MI, Defs);
2736 // If this instruction does not define any pending inputs, skip it.
2737 if (!Defs.intersects(Inputs))
2738 continue;
2739 // Otherwise, add it to the current group and remove the inputs that
2740 // are defined by MI.
2741 G.Ins.push_back(MI);
2742 Inputs.remove(Defs);
2743 // Then add all registers used by MI.
2744 HBS::getInstrUses(*MI, Inputs);
2745 ShufIns[j] = nullptr;
2746 }
2747
2748 // Only add a group if it requires at most one register.
2749 if (Inputs.count() > 1)
2750 continue;
2751 auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
2752 return G.Out.Reg == P.LR.Reg;
2753 };
David Majnemer562e8292016-08-12 00:18:03 +00002754 if (find_if(Phis, LoopInpEq) == Phis.end())
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002755 continue;
2756
2757 G.Inp.Reg = Inputs.find_first();
2758 Groups.push_back(G);
2759 }
2760
2761 DEBUG({
2762 for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
2763 InstrGroup &G = Groups[i];
2764 dbgs() << "Group[" << i << "] inp: "
2765 << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
2766 << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
2767 for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
2768 dbgs() << " " << *G.Ins[j];
2769 }
2770 });
2771
2772 for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
2773 InstrGroup &G = Groups[i];
2774 if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
2775 continue;
2776 auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
2777 return G.Out.Reg == P.LR.Reg;
2778 };
David Majnemer562e8292016-08-12 00:18:03 +00002779 auto F = find_if(Phis, LoopInpEq);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002780 if (F == Phis.end())
2781 continue;
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002782 unsigned PrehR = 0;
2783 if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
2784 const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
2785 unsigned Opc = DefPrehR->getOpcode();
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002786 if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
2787 continue;
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002788 if (!DefPrehR->getOperand(1).isImm())
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002789 continue;
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002790 if (DefPrehR->getOperand(1).getImm() != 0)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002791 continue;
2792 const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
2793 if (RC != MRI->getRegClass(F->PR.Reg)) {
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002794 PrehR = MRI->createVirtualRegister(RC);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002795 unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
2796 : Hexagon::A2_tfrpi;
2797 auto T = C.PB->getFirstTerminator();
2798 DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002799 BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002800 .addImm(0);
2801 } else {
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002802 PrehR = F->PR.Reg;
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002803 }
2804 }
Krzysztof Parzyszek57c3ddd2016-07-26 19:17:13 +00002805 // isSameShuffle could match with PrehR being of a wider class than
2806 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
2807 // it would match for the input being a 32-bit register, and PrehR
2808 // being a 64-bit register (where the low 32 bits match). This could
2809 // be handled, but for now skip these cases.
2810 if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
2811 continue;
2812 moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002813 Changed = true;
2814 }
2815
2816 return Changed;
2817}
2818
2819
2820bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
Andrew Kaylor5b444a22016-04-26 19:46:28 +00002821 if (skipFunction(*MF.getFunction()))
2822 return false;
2823
Krzysztof Parzyszekced99412015-10-20 22:57:13 +00002824 auto &HST = MF.getSubtarget<HexagonSubtarget>();
2825 HII = HST.getInstrInfo();
2826 HRI = HST.getRegisterInfo();
2827 MRI = &MF.getRegInfo();
2828 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
2829 BitTracker BT(HE, MF);
2830 DEBUG(BT.trace(true));
2831 BT.run();
2832 BTP = &BT;
2833
2834 std::vector<LoopCand> Cand;
2835
2836 for (auto &B : MF) {
2837 if (B.pred_size() != 2 || B.succ_size() != 2)
2838 continue;
2839 MachineBasicBlock *PB = nullptr;
2840 bool IsLoop = false;
2841 for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
2842 if (*PI != &B)
2843 PB = *PI;
2844 else
2845 IsLoop = true;
2846 }
2847 if (!IsLoop)
2848 continue;
2849
2850 MachineBasicBlock *EB = nullptr;
2851 for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
2852 if (*SI == &B)
2853 continue;
2854 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
2855 // edge from B to EP is non-critical.
2856 if ((*SI)->pred_size() == 1)
2857 EB = *SI;
2858 break;
2859 }
2860
2861 Cand.push_back(LoopCand(&B, PB, EB));
2862 }
2863
2864 bool Changed = false;
2865 for (auto &C : Cand)
2866 Changed |= processLoop(C);
2867
2868 return Changed;
2869}
2870
2871//===----------------------------------------------------------------------===//
2872// Public Constructor Functions
2873//===----------------------------------------------------------------------===//
2874
2875FunctionPass *llvm::createHexagonLoopRescheduling() {
2876 return new HexagonLoopRescheduling();
2877}
2878
2879FunctionPass *llvm::createHexagonBitSimplify() {
2880 return new HexagonBitSimplify();
2881}
2882