blob: bdb463f9de5a1049e3851d5412915d6b62293749 [file] [log] [blame]
Jingyue Wu9c711502015-06-24 20:20:16 +00001//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
11// of a MachineFunction.
12//
13// mov %SPL, %depot
14// cvta.local %SP, %SPL
15//
16// Because Frame Index is a generic address and alloca can only return generic
17// pointer, without this pass the instructions producing alloca'ed address will
18// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
19// this address with their .local versions, but this may introduce a lot of
20// cvta.to.local instructions. Performance can be improved if we avoid casting
21// address back and forth and directly calculate local address based on %SPL.
22// This peephole pass optimizes these cases, for example
23//
24// It will transform the following pattern
25// %vreg0<def> = LEA_ADDRi64 <fi#0>, 4
26// %vreg1<def> = cvta_to_local_yes_64 %vreg0
27//
28// into
29// %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
30//
31// %VRFrameLocal is the virtual register name of %SPL
32//
33//===----------------------------------------------------------------------===//
34
35#include "NVPTX.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/MachineFrameInfo.h"
40#include "llvm/Target/TargetRegisterInfo.h"
41#include "llvm/Target/TargetInstrInfo.h"
42
43using namespace llvm;
44
45#define DEBUG_TYPE "nvptx-peephole"
46
47namespace llvm {
48void initializeNVPTXPeepholePass(PassRegistry &);
49}
50
51namespace {
52struct NVPTXPeephole : public MachineFunctionPass {
53 public:
54 static char ID;
55 NVPTXPeephole() : MachineFunctionPass(ID) {
56 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
57 }
58
59 bool runOnMachineFunction(MachineFunction &MF) override;
60
61 const char *getPassName() const override {
62 return "NVPTX optimize redundant cvta.to.local instruction";
63 }
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 MachineFunctionPass::getAnalysisUsage(AU);
67 }
68};
69}
70
71char NVPTXPeephole::ID = 0;
72
73INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
74
75static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
76 auto &MBB = *Root.getParent();
77 auto &MF = *MBB.getParent();
78 // Check current instruction is cvta.to.local
79 if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
80 Root.getOpcode() != NVPTX::cvta_to_local_yes)
81 return false;
82
83 auto &Op = Root.getOperand(1);
84 const auto &MRI = MF.getRegInfo();
85 MachineInstr *GenericAddrDef = nullptr;
86 if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
87 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
88 }
89
90 // Check the register operand is uniquely defined by LEA_ADDRi instruction
91 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
92 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
93 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
94 return false;
95 }
96
97 // Check the LEA_ADDRi operand is Frame index
98 auto &BaseAddrOp = GenericAddrDef->getOperand(1);
99 if (BaseAddrOp.getType() == MachineOperand::MO_FrameIndex) {
100 return true;
101 }
102
103 return false;
104}
105
106static void CombineCVTAToLocal(MachineInstr &Root) {
107 auto &MBB = *Root.getParent();
108 auto &MF = *MBB.getParent();
109 const auto &MRI = MF.getRegInfo();
110 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
111 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
112
113 // Get the correct offset
114 int FrameIndex = Prev.getOperand(1).getIndex();
115 int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
116 Prev.getOperand(2).getImm();
117
118 MachineInstrBuilder MIB =
119 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
120 Root.getOperand(0).getReg())
121 .addReg(NVPTX::VRFrameLocal)
122 .addOperand(MachineOperand::CreateImm(Offset));
123
124 MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
125
126 // Check if MRI has only one non dbg use, which is Root
127 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
128 Prev.eraseFromParentAndMarkDBGValuesForRemoval();
129 }
130 Root.eraseFromParentAndMarkDBGValuesForRemoval();
131}
132
133bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
134 bool Changed = false;
135 // Loop over all of the basic blocks.
136 for (auto &MBB : MF) {
137 // Traverse the basic block.
138 auto BlockIter = MBB.begin();
139
140 while (BlockIter != MBB.end()) {
141 auto &MI = *BlockIter++;
142 if (isCVTAToLocalCombinationCandidate(MI)) {
143 CombineCVTAToLocal(MI);
144 Changed = true;
145 }
146 } // Instruction
147 } // Basic Block
148 return Changed;
149}
150
151MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }