blob: 761e8665e3a5a32a2f170cd3d8e3305bef4125d1 [file] [log] [blame]
Tom Stellard6596ba72014-11-21 22:06:37 +00001//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//===----------------------------------------------------------------------===//
10//
11
12#include "AMDGPU.h"
13#include "AMDGPUSubtarget.h"
14#include "SIInstrInfo.h"
15#include "llvm/CodeGen/LiveIntervalAnalysis.h"
16#include "llvm/CodeGen/MachineDominators.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/IR/Function.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Target/TargetMachine.h"
24
25#define DEBUG_TYPE "si-fold-operands"
26using namespace llvm;
27
28namespace {
29
30class SIFoldOperands : public MachineFunctionPass {
31public:
32 static char ID;
33
34public:
35 SIFoldOperands() : MachineFunctionPass(ID) {
36 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
37 }
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 const char *getPassName() const override {
42 return "SI Fold Operands";
43 }
44
45 void getAnalysisUsage(AnalysisUsage &AU) const override {
46 AU.addRequired<MachineDominatorTree>();
47 AU.setPreservesCFG();
48 MachineFunctionPass::getAnalysisUsage(AU);
49 }
50};
51
52} // End anonymous namespace.
53
54INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
55 "SI Fold Operands", false, false)
56INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
57INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
58 "SI Fold Operands", false, false)
59
60char SIFoldOperands::ID = 0;
61
62char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
63
64FunctionPass *llvm::createSIFoldOperandsPass() {
65 return new SIFoldOperands();
66}
67
68static bool isSafeToFold(unsigned Opcode) {
69 switch(Opcode) {
70 case AMDGPU::V_MOV_B32_e32:
71 case AMDGPU::V_MOV_B32_e64:
72 case AMDGPU::S_MOV_B32:
73 case AMDGPU::S_MOV_B64:
74 case AMDGPU::COPY:
75 return true;
76 default:
77 return false;
78 }
79}
80
81static bool updateOperand(MachineInstr *MI, unsigned OpNo,
82 const MachineOperand &New,
83 const TargetRegisterInfo &TRI) {
84 MachineOperand &Old = MI->getOperand(OpNo);
85 assert(Old.isReg());
86
87 if (New.isImm()) {
88 Old.ChangeToImmediate(New.getImm());
89 return true;
90 }
91
92 if (New.isFPImm()) {
93 Old.ChangeToFPImmediate(New.getFPImm());
94 return true;
95 }
96
97 if (New.isReg()) {
98 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
99 TargetRegisterInfo::isVirtualRegister(New.getReg())) {
100 Old.substVirtReg(New.getReg(), New.getSubReg(), TRI);
101 return true;
102 }
103 }
104
105 // FIXME: Handle physical registers.
106
107 return false;
108}
109
110bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
111 MachineRegisterInfo &MRI = MF.getRegInfo();
112 const SIInstrInfo *TII =
113 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
114 const SIRegisterInfo &TRI = TII->getRegisterInfo();
115
116 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
117 BI != BE; ++BI) {
118
119 MachineBasicBlock &MBB = *BI;
120 MachineBasicBlock::iterator I, Next;
121 for (I = MBB.begin(); I != MBB.end(); I = Next) {
122 Next = std::next(I);
123 MachineInstr &MI = *I;
124
125 if (!isSafeToFold(MI.getOpcode()))
126 continue;
127
128 MachineOperand &OpToFold = MI.getOperand(1);
129
130 // FIXME: Fold operands with subregs.
131 if (OpToFold.isReg() &&
132 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
133 OpToFold.getSubReg()))
134 continue;
135
136 std::vector<std::pair<MachineInstr *, unsigned>> FoldList;
137 for (MachineRegisterInfo::use_iterator
138 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
139 Use != E; ++Use) {
140
141 MachineInstr *UseMI = Use->getParent();
142 const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
143
144 // FIXME: Fold operands with subregs.
145 if (UseOp.isReg() && UseOp.getSubReg()) {
146 continue;
147 }
148
149 // In order to fold immediates into copies, we need to change the
150 // copy to a MOV.
151 if ((OpToFold.isImm() || OpToFold.isFPImm()) &&
152 UseMI->getOpcode() == AMDGPU::COPY) {
153 const TargetRegisterClass *TRC =
154 MRI.getRegClass(UseMI->getOperand(0).getReg());
155
156 if (TRC->getSize() == 4) {
157 if (TRI.isSGPRClass(TRC))
158 UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
159 else
160 UseMI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
161 } else if (TRC->getSize() == 8 && TRI.isSGPRClass(TRC)) {
162 UseMI->setDesc(TII->get(AMDGPU::S_MOV_B64));
163 } else {
164 continue;
165 }
166 }
167
168 const MCInstrDesc &UseDesc = UseMI->getDesc();
169
170 // Don't fold into target independent nodes. Target independent opcodes
171 // don't have defined register classes.
172 if (UseDesc.isVariadic() ||
173 UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
174 continue;
175
176 // Normal substitution
177 if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
178 FoldList.push_back(std::make_pair(UseMI, Use.getOperandNo()));
179 continue;
180 }
181
182 // FIXME: We could commute the instruction to create more opportunites
183 // for folding. This will only be useful if we have 32-bit instructions.
184
185 // FIXME: We could try to change the instruction from 64-bit to 32-bit
186 // to enable more folding opportunites. The shrink operands pass
187 // already does this.
188 }
189
190 for (std::pair<MachineInstr *, unsigned> Fold : FoldList) {
191 if (updateOperand(Fold.first, Fold.second, OpToFold, TRI)) {
192 // Clear kill flags.
193 if (OpToFold.isReg())
194 OpToFold.setIsKill(false);
195 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
196 Fold.second << " of " << *Fold.first << '\n');
197 }
198 }
199 }
200 }
201 return false;
202}