blob: 3e456c236acf522f15749805a77f5b04a53d8150 [file] [log] [blame]
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +00001//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the SSEDomainFix pass.
11//
12// Some SSE instructions like mov, and, or, xor are available in different
13// variants for different operand types. These variant instructions are
14// equivalent, but on Nehalem and newer cpus there is extra latency
15// transferring data between integer and floating point domains.
16//
17// This pass changes the variant instructions to minimize domain crossings.
18//
19//===----------------------------------------------------------------------===//
20
21#define DEBUG_TYPE "sse-domain-fix"
22#include "X86InstrInfo.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/ADT/DepthFirstIterator.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31namespace {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +000032
33/// Allocate objects from a pool, allow objects to be recycled, and provide a
34/// way of deleting everything.
35template<typename T, unsigned PageSize = 64>
36class PoolAllocator {
37 std::vector<T*> Pages, Avail;
38public:
39 ~PoolAllocator() { Clear(); }
40
41 T* Alloc() {
42 if (Avail.empty()) {
43 T *p = new T[PageSize];
44 Pages.push_back(p);
45 Avail.reserve(PageSize);
46 for (unsigned n = 0; n != PageSize; ++n)
47 Avail.push_back(p+n);
48 }
49 T *p = Avail.back();
50 Avail.pop_back();
51 return p;
52 }
53
54 // Allow object to be reallocated. It won't be reconstructed.
55 void Recycle(T *p) {
56 p->clear();
57 Avail.push_back(p);
58 }
59
60 // Destroy all objects, make sure there are no external pointers to them.
61 void Clear() {
62 Avail.clear();
63 while (!Pages.empty()) {
64 delete[] Pages.back();
65 Pages.pop_back();
66 }
67 }
68};
69
70/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
71/// of execution domains.
72///
73/// An open DomainValue represents a set of instructions that can still switch
74/// execution domain. Multiple registers may refer to the same open
75/// DomainValue - they will eventually be collapsed to the same execution
76/// domain.
77///
78/// A collapsed DomainValue represents a single register that has been forced
79/// into one of more execution domains. There is a separate collapsed
80/// DomainValue for each register, but it may contain multiple execution
81/// domains. A register value is initially created in a single execution
82/// domain, but if we were forced to pay the penalty of a domain crossing, we
83/// keep track of the fact the the register is now available in multiple
84/// domains.
85struct DomainValue {
86 // Basic reference counting.
87 unsigned Refs;
88
89 // Available domains. For an open DomainValue, it is the still possible
90 // domains for collapsing. For a collapsed DomainValue it is the domains where
91 // the register is available for free.
92 unsigned Mask;
93
94 // Position of the last defining instruction.
95 unsigned Dist;
96
97 // Twiddleable instructions using or defining these registers.
98 SmallVector<MachineInstr*, 8> Instrs;
99
100 // Collapsed DomainValue have no instructions to twiddle - it simply keeps
101 // track of the domains where the registers are already available.
102 bool collapsed() const { return Instrs.empty(); }
103
104 // Is any domain in mask available?
105 bool compat(unsigned mask) const {
106 return Mask & mask;
107 }
108
109 // Mark domain as available
110 void add(unsigned domain) {
111 Mask |= 1u << domain;
112 }
113
114 DomainValue() { clear(); }
115
116 void clear() {
117 Refs = Mask = Dist = 0;
118 Instrs.clear();
119 }
120};
121
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000122class SSEDomainFixPass : public MachineFunctionPass {
123 static char ID;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000124 PoolAllocator<DomainValue> Pool;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000125
126 MachineFunction *MF;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000127 const X86InstrInfo *TII;
128 const TargetRegisterInfo *TRI;
129
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000130 MachineBasicBlock *MBB;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000131 bool hasLiveRegs;
132 DomainValue *LiveRegs[16];
133
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000134public:
135 SSEDomainFixPass() : MachineFunctionPass(&ID) {}
136
137 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
138 AU.setPreservesAll();
139 MachineFunctionPass::getAnalysisUsage(AU);
140 }
141
142 virtual bool runOnMachineFunction(MachineFunction &MF);
143
144 virtual const char *getPassName() const {
145 return "SSE execution domain fixup";
146 }
147
148private:
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000149 // Register mapping.
150 int RegIndex(unsigned Reg);
151
152 // LiveRegs manipulations.
153 void SetLiveReg(int rx, DomainValue *DV);
154 void Kill(int rx);
155 void Force(int rx, unsigned domain);
156 void Collapse(DomainValue *dv, unsigned domain);
157 bool Merge(DomainValue *A, DomainValue *B);
158
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000159 void enterBasicBlock(MachineBasicBlock *MBB);
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000160 void visitGenericInstr(MachineInstr*);
161 void visitSoftInstr(MachineInstr*, unsigned mask);
162 void visitHardInstr(MachineInstr*, unsigned domain);
163
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000164};
165}
166
167char SSEDomainFixPass::ID = 0;
168
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000169/// Translate TRI register number to an index into our smaller tables of
170/// interesting registers. Return -1 for boring registers.
171int SSEDomainFixPass::RegIndex(unsigned reg) {
172 // Registers are sorted lexicographically.
173 // We just need them to be consecutive, ordering doesn't matter.
174 assert(X86::XMM9 == X86::XMM0+15 && "Unexpected sort");
175 reg -= X86::XMM0;
176 return reg < 16 ? reg : -1;
177}
178
179/// Set LiveRegs[rx] = dv, updating reference counts.
180void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
181 if (LiveRegs[rx] == dv)
182 return;
183 if (LiveRegs[rx]) {
184 assert(LiveRegs[rx]->Refs && "Bad refcount");
185 if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
186 }
187 LiveRegs[rx] = dv;
188 if (dv) ++dv->Refs;
189}
190
191// Kill register rx, recycle or collapse any DomainValue.
192void SSEDomainFixPass::Kill(int rx) {
193 if (!LiveRegs[rx]) return;
194
195 // Before killing the last reference to an open DomainValue, collapse it to
196 // the first available domain.
197 if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
198 Collapse(LiveRegs[rx], CountTrailingZeros_32(LiveRegs[rx]->Mask));
199 else
200 SetLiveReg(rx, 0);
201}
202
203/// Force register rx into domain.
204void SSEDomainFixPass::Force(int rx, unsigned domain) {
205 hasLiveRegs = true;
206 if (DomainValue *dv = LiveRegs[rx]) {
207 if (dv->collapsed())
208 dv->add(domain);
209 else
210 Collapse(dv, domain);
211 } else {
212 // Set up basic collapsed DomainValue
213 DomainValue *dv = Pool.Alloc();
214 dv->add(domain);
215 SetLiveReg(rx, dv);
216 }
217}
218
219/// Collapse open DomainValue into given domain. If there are multiple
220/// registers using dv, they each get a unique collapsed DomainValue.
221void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
222 assert(dv->compat(1u << domain) && "Cannot collapse");
223
224 // Collapse all the instructions.
225 while (!dv->Instrs.empty()) {
226 MachineInstr *mi = dv->Instrs.back();
227 TII->SetSSEDomain(mi, domain);
228 dv->Instrs.pop_back();
229 }
230 dv->Mask = 1u << domain;
231
232 // If there are multiple users, give them new, unique DomainValues.
233 if (dv->Refs > 1) {
234 for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
235 if (LiveRegs[rx] == dv) {
236 DomainValue *dv2 = Pool.Alloc();
237 dv2->add(domain);
238 SetLiveReg(rx, dv2);
239 }
240 Pool.Recycle(dv);
241 }
242}
243
244/// Merge - All instructions and registers in B are moved to A, and B is
245/// released.
246bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
247 assert(!A->collapsed() && "Cannot merge into collapsed");
248 assert(!B->collapsed() && "Cannot merge from collapsed");
249 if (!A->compat(B->Mask))
250 return false;
251 A->Mask &= B->Mask;
252 A->Dist = std::max(A->Dist, B->Dist);
253 A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
254 for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
255 if (LiveRegs[rx] == B)
256 SetLiveReg(rx, A);
257 return true;
258}
259
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000260void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
261 MBB = mbb;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000262}
263
264// A hard instruction only works in one domain. All input registers will be
265// forced into that domain.
266void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
267 // Collapse all uses.
268 for (unsigned i = mi->getDesc().getNumDefs(),
269 e = mi->getDesc().getNumOperands(); i != e; ++i) {
270 MachineOperand &mo = mi->getOperand(i);
271 if (!mo.isReg()) continue;
272 int rx = RegIndex(mo.getReg());
273 if (rx < 0) continue;
274 Force(rx, domain);
275 }
276
277 // Kill all defs and force them.
278 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
279 MachineOperand &mo = mi->getOperand(i);
280 if (!mo.isReg()) continue;
281 int rx = RegIndex(mo.getReg());
282 if (rx < 0) continue;
283 Kill(rx);
284 Force(rx, domain);
285 }
286}
287
288// A soft instruction can be changed to work in other domains given by mask.
289void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
290 // Scan the explicit use operands for incoming domains.
291 unsigned collmask = mask;
292 SmallVector<int, 4> used;
293 for (unsigned i = mi->getDesc().getNumDefs(),
294 e = mi->getDesc().getNumOperands(); i != e; ++i) {
295 MachineOperand &mo = mi->getOperand(i);
296 if (!mo.isReg()) continue;
297 int rx = RegIndex(mo.getReg());
298 if (rx < 0) continue;
299 if (DomainValue *dv = LiveRegs[rx]) {
300 // Is it possible to use this collapsed register for free?
301 if (dv->collapsed()) {
302 if (unsigned m = collmask & dv->Mask)
303 collmask = m;
304 } else if (dv->compat(collmask))
305 used.push_back(rx);
306 else
307 Kill(rx);
308 }
309 }
310
311 // If the collapsed operands force a single domain, propagate the collapse.
312 if (isPowerOf2_32(collmask)) {
313 unsigned domain = CountTrailingZeros_32(collmask);
314 TII->SetSSEDomain(mi, domain);
315 visitHardInstr(mi, domain);
316 return;
317 }
318
319 // Kill off any remaining uses that don't match collmask, and build a list of
320 // incoming DomainValue that we want to merge.
321 SmallVector<DomainValue*,4> doms;
322 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
323 int rx = *i;
324 DomainValue *dv = LiveRegs[rx];
325 // This useless DomainValue could have been missed above
326 if (!dv->compat(collmask)) {
327 Kill(*i);
328 continue;
329 }
330 // sorted, uniqued insert.
331 bool inserted = false;
332 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
333 i != e && !inserted; ++i) {
334 if (dv == *i)
335 inserted = true;
336 else if (dv->Dist < (*i)->Dist) {
337 inserted = true;
338 doms.insert(i, dv);
339 }
340 }
341 if (!inserted)
342 doms.push_back(dv);
343 }
344
345 // doms are now sorted in order of appearance. Try to merge them all, giving
346 // priority to the latest ones.
347 DomainValue *dv = 0;
348 while (!doms.empty()) {
349 if (!dv)
350 dv = doms.back();
351 else if (!Merge(dv, doms.back()))
352 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
353 if (LiveRegs[*i] == doms.back())
354 Kill(*i);
355 doms.pop_back();
356 }
357
358 // dv is the DomainValue we are going to use for this instruction.
359 if (!dv)
360 dv = Pool.Alloc();
361 dv->Mask = collmask;
362 dv->Instrs.push_back(mi);
363
364 // Finally set all defs and non-collapsed uses to dv.
365 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
366 MachineOperand &mo = mi->getOperand(i);
367 if (!mo.isReg()) continue;
368 int rx = RegIndex(mo.getReg());
369 if (rx < 0) continue;
370 if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
371 Kill(rx);
372 SetLiveReg(rx, dv);
373 }
374 }
375}
376
377void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
378 // Process explicit defs, kill any XMM registers redefined
379 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
380 MachineOperand &mo = mi->getOperand(i);
381 if (!mo.isReg()) continue;
382 int rx = RegIndex(mo.getReg());
383 if (rx < 0) continue;
384 Kill(rx);
385 }
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000386}
387
388bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
389 MF = &mf;
390 TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000391 TRI = MF->getTarget().getRegisterInfo();
392 MBB = 0;
393
394 hasLiveRegs = false;
395 for (unsigned i=0, e = array_lengthof(LiveRegs); i != e; ++i)
396 LiveRegs[i] = 0;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000397
398 // If no XMM registers are used in the function, we can skip it completely.
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000399 bool anyregs = false;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000400 for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
401 E = X86::VR128RegClass.end(); I != E; ++I)
402 if (MF->getRegInfo().isPhysRegUsed(*I)) {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000403 anyregs = true;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000404 break;
405 }
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000406 if (!anyregs) return false;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000407
408 MachineBasicBlock *Entry = MF->begin();
409 SmallPtrSet<MachineBasicBlock*, 16> Visited;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000410 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000411 DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000412 DFI != DFE; ++DFI) {
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000413 enterBasicBlock(*DFI);
414 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
415 ++I) {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000416 MachineInstr *mi = I;
417 if (mi->isDebugValue()) continue;
418 std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
419 if (domp.first)
420 if (domp.second)
421 visitSoftInstr(mi, domp.second);
422 else
423 visitHardInstr(mi, domp.first);
424 else if (hasLiveRegs)
425 visitGenericInstr(mi);
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000426 }
427 }
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000428
429 Pool.Clear();
430
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000431 return false;
432}
433
434FunctionPass *llvm::createSSEDomainFixPass() {
435 return new SSEDomainFixPass();
436}