blob: 415c1e89835d7eeb51f1da8d9d18e330da60ad64 [file] [log] [blame]
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +00001//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the SSEDomainFix pass.
11//
12// Some SSE instructions like mov, and, or, xor are available in different
13// variants for different operand types. These variant instructions are
14// equivalent, but on Nehalem and newer cpus there is extra latency
15// transferring data between integer and floating point domains.
16//
17// This pass changes the variant instructions to minimize domain crossings.
18//
19//===----------------------------------------------------------------------===//
20
21#define DEBUG_TYPE "sse-domain-fix"
22#include "X86InstrInfo.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/ADT/DepthFirstIterator.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
31namespace {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +000032
33/// Allocate objects from a pool, allow objects to be recycled, and provide a
34/// way of deleting everything.
35template<typename T, unsigned PageSize = 64>
36class PoolAllocator {
37 std::vector<T*> Pages, Avail;
38public:
39 ~PoolAllocator() { Clear(); }
40
41 T* Alloc() {
42 if (Avail.empty()) {
43 T *p = new T[PageSize];
44 Pages.push_back(p);
45 Avail.reserve(PageSize);
46 for (unsigned n = 0; n != PageSize; ++n)
47 Avail.push_back(p+n);
48 }
49 T *p = Avail.back();
50 Avail.pop_back();
51 return p;
52 }
53
54 // Allow object to be reallocated. It won't be reconstructed.
55 void Recycle(T *p) {
56 p->clear();
57 Avail.push_back(p);
58 }
59
60 // Destroy all objects, make sure there are no external pointers to them.
61 void Clear() {
62 Avail.clear();
63 while (!Pages.empty()) {
64 delete[] Pages.back();
65 Pages.pop_back();
66 }
67 }
68};
69
70/// A DomainValue is a bit like LiveIntervals' ValNo, but it laso keeps track
71/// of execution domains.
72///
73/// An open DomainValue represents a set of instructions that can still switch
74/// execution domain. Multiple registers may refer to the same open
75/// DomainValue - they will eventually be collapsed to the same execution
76/// domain.
77///
78/// A collapsed DomainValue represents a single register that has been forced
79/// into one of more execution domains. There is a separate collapsed
80/// DomainValue for each register, but it may contain multiple execution
81/// domains. A register value is initially created in a single execution
82/// domain, but if we were forced to pay the penalty of a domain crossing, we
83/// keep track of the fact the the register is now available in multiple
84/// domains.
85struct DomainValue {
86 // Basic reference counting.
87 unsigned Refs;
88
89 // Available domains. For an open DomainValue, it is the still possible
90 // domains for collapsing. For a collapsed DomainValue it is the domains where
91 // the register is available for free.
92 unsigned Mask;
93
94 // Position of the last defining instruction.
95 unsigned Dist;
96
97 // Twiddleable instructions using or defining these registers.
98 SmallVector<MachineInstr*, 8> Instrs;
99
100 // Collapsed DomainValue have no instructions to twiddle - it simply keeps
101 // track of the domains where the registers are already available.
102 bool collapsed() const { return Instrs.empty(); }
103
104 // Is any domain in mask available?
105 bool compat(unsigned mask) const {
106 return Mask & mask;
107 }
108
109 // Mark domain as available
110 void add(unsigned domain) {
111 Mask |= 1u << domain;
112 }
113
114 DomainValue() { clear(); }
115
116 void clear() {
117 Refs = Mask = Dist = 0;
118 Instrs.clear();
119 }
120};
121
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000122class SSEDomainFixPass : public MachineFunctionPass {
123 static char ID;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000124 PoolAllocator<DomainValue> Pool;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000125
126 MachineFunction *MF;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000127 const X86InstrInfo *TII;
128 const TargetRegisterInfo *TRI;
129
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000130 MachineBasicBlock *MBB;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000131 bool hasLiveRegs;
132 DomainValue *LiveRegs[16];
133
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000134public:
135 SSEDomainFixPass() : MachineFunctionPass(&ID) {}
136
137 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
138 AU.setPreservesAll();
139 MachineFunctionPass::getAnalysisUsage(AU);
140 }
141
142 virtual bool runOnMachineFunction(MachineFunction &MF);
143
144 virtual const char *getPassName() const {
145 return "SSE execution domain fixup";
146 }
147
148private:
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000149 // Register mapping.
150 int RegIndex(unsigned Reg);
151
152 // LiveRegs manipulations.
153 void SetLiveReg(int rx, DomainValue *DV);
154 void Kill(int rx);
155 void Force(int rx, unsigned domain);
156 void Collapse(DomainValue *dv, unsigned domain);
157 bool Merge(DomainValue *A, DomainValue *B);
158
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000159 void enterBasicBlock(MachineBasicBlock *MBB);
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000160 void visitGenericInstr(MachineInstr*);
161 void visitSoftInstr(MachineInstr*, unsigned mask);
162 void visitHardInstr(MachineInstr*, unsigned domain);
163
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000164};
165}
166
167char SSEDomainFixPass::ID = 0;
168
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000169/// Translate TRI register number to an index into our smaller tables of
170/// interesting registers. Return -1 for boring registers.
171int SSEDomainFixPass::RegIndex(unsigned reg) {
172 // Registers are sorted lexicographically.
173 // We just need them to be consecutive, ordering doesn't matter.
174 assert(X86::XMM9 == X86::XMM0+15 && "Unexpected sort");
175 reg -= X86::XMM0;
176 return reg < 16 ? reg : -1;
177}
178
179/// Set LiveRegs[rx] = dv, updating reference counts.
180void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
181 if (LiveRegs[rx] == dv)
182 return;
183 if (LiveRegs[rx]) {
184 assert(LiveRegs[rx]->Refs && "Bad refcount");
185 if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
186 }
187 LiveRegs[rx] = dv;
188 if (dv) ++dv->Refs;
189}
190
191// Kill register rx, recycle or collapse any DomainValue.
192void SSEDomainFixPass::Kill(int rx) {
193 if (!LiveRegs[rx]) return;
194
195 // Before killing the last reference to an open DomainValue, collapse it to
196 // the first available domain.
197 if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
198 Collapse(LiveRegs[rx], CountTrailingZeros_32(LiveRegs[rx]->Mask));
199 else
200 SetLiveReg(rx, 0);
201}
202
203/// Force register rx into domain.
204void SSEDomainFixPass::Force(int rx, unsigned domain) {
205 hasLiveRegs = true;
Jakob Stoklund Olesend77f8182010-03-30 00:09:32 +0000206 DomainValue *dv;
207 if ((dv = LiveRegs[rx])) {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000208 if (dv->collapsed())
209 dv->add(domain);
210 else
211 Collapse(dv, domain);
212 } else {
213 // Set up basic collapsed DomainValue
Jakob Stoklund Olesend77f8182010-03-30 00:09:32 +0000214 dv = Pool.Alloc();
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000215 dv->add(domain);
216 SetLiveReg(rx, dv);
217 }
218}
219
220/// Collapse open DomainValue into given domain. If there are multiple
221/// registers using dv, they each get a unique collapsed DomainValue.
222void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
223 assert(dv->compat(1u << domain) && "Cannot collapse");
224
225 // Collapse all the instructions.
226 while (!dv->Instrs.empty()) {
227 MachineInstr *mi = dv->Instrs.back();
228 TII->SetSSEDomain(mi, domain);
229 dv->Instrs.pop_back();
230 }
231 dv->Mask = 1u << domain;
232
233 // If there are multiple users, give them new, unique DomainValues.
234 if (dv->Refs > 1) {
235 for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
236 if (LiveRegs[rx] == dv) {
237 DomainValue *dv2 = Pool.Alloc();
238 dv2->add(domain);
239 SetLiveReg(rx, dv2);
240 }
241 Pool.Recycle(dv);
242 }
243}
244
245/// Merge - All instructions and registers in B are moved to A, and B is
246/// released.
247bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
248 assert(!A->collapsed() && "Cannot merge into collapsed");
249 assert(!B->collapsed() && "Cannot merge from collapsed");
250 if (!A->compat(B->Mask))
251 return false;
252 A->Mask &= B->Mask;
253 A->Dist = std::max(A->Dist, B->Dist);
254 A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
255 for (unsigned rx=0, e = array_lengthof(LiveRegs); rx != e; ++rx)
256 if (LiveRegs[rx] == B)
257 SetLiveReg(rx, A);
258 return true;
259}
260
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000261void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
262 MBB = mbb;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000263}
264
265// A hard instruction only works in one domain. All input registers will be
266// forced into that domain.
267void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
268 // Collapse all uses.
269 for (unsigned i = mi->getDesc().getNumDefs(),
270 e = mi->getDesc().getNumOperands(); i != e; ++i) {
271 MachineOperand &mo = mi->getOperand(i);
272 if (!mo.isReg()) continue;
273 int rx = RegIndex(mo.getReg());
274 if (rx < 0) continue;
275 Force(rx, domain);
276 }
277
278 // Kill all defs and force them.
279 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
280 MachineOperand &mo = mi->getOperand(i);
281 if (!mo.isReg()) continue;
282 int rx = RegIndex(mo.getReg());
283 if (rx < 0) continue;
284 Kill(rx);
285 Force(rx, domain);
286 }
287}
288
289// A soft instruction can be changed to work in other domains given by mask.
290void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
291 // Scan the explicit use operands for incoming domains.
292 unsigned collmask = mask;
293 SmallVector<int, 4> used;
294 for (unsigned i = mi->getDesc().getNumDefs(),
295 e = mi->getDesc().getNumOperands(); i != e; ++i) {
296 MachineOperand &mo = mi->getOperand(i);
297 if (!mo.isReg()) continue;
298 int rx = RegIndex(mo.getReg());
299 if (rx < 0) continue;
300 if (DomainValue *dv = LiveRegs[rx]) {
301 // Is it possible to use this collapsed register for free?
302 if (dv->collapsed()) {
303 if (unsigned m = collmask & dv->Mask)
304 collmask = m;
305 } else if (dv->compat(collmask))
306 used.push_back(rx);
307 else
308 Kill(rx);
309 }
310 }
311
312 // If the collapsed operands force a single domain, propagate the collapse.
313 if (isPowerOf2_32(collmask)) {
314 unsigned domain = CountTrailingZeros_32(collmask);
315 TII->SetSSEDomain(mi, domain);
316 visitHardInstr(mi, domain);
317 return;
318 }
319
320 // Kill off any remaining uses that don't match collmask, and build a list of
321 // incoming DomainValue that we want to merge.
322 SmallVector<DomainValue*,4> doms;
323 for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
324 int rx = *i;
325 DomainValue *dv = LiveRegs[rx];
326 // This useless DomainValue could have been missed above
327 if (!dv->compat(collmask)) {
328 Kill(*i);
329 continue;
330 }
331 // sorted, uniqued insert.
332 bool inserted = false;
333 for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
334 i != e && !inserted; ++i) {
335 if (dv == *i)
336 inserted = true;
337 else if (dv->Dist < (*i)->Dist) {
338 inserted = true;
339 doms.insert(i, dv);
340 }
341 }
342 if (!inserted)
343 doms.push_back(dv);
344 }
345
346 // doms are now sorted in order of appearance. Try to merge them all, giving
347 // priority to the latest ones.
348 DomainValue *dv = 0;
349 while (!doms.empty()) {
350 if (!dv)
351 dv = doms.back();
352 else if (!Merge(dv, doms.back()))
353 for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i!=e; ++i)
354 if (LiveRegs[*i] == doms.back())
355 Kill(*i);
356 doms.pop_back();
357 }
358
359 // dv is the DomainValue we are going to use for this instruction.
360 if (!dv)
361 dv = Pool.Alloc();
362 dv->Mask = collmask;
363 dv->Instrs.push_back(mi);
364
365 // Finally set all defs and non-collapsed uses to dv.
366 for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
367 MachineOperand &mo = mi->getOperand(i);
368 if (!mo.isReg()) continue;
369 int rx = RegIndex(mo.getReg());
370 if (rx < 0) continue;
371 if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
372 Kill(rx);
373 SetLiveReg(rx, dv);
374 }
375 }
376}
377
378void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
379 // Process explicit defs, kill any XMM registers redefined
380 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
381 MachineOperand &mo = mi->getOperand(i);
382 if (!mo.isReg()) continue;
383 int rx = RegIndex(mo.getReg());
384 if (rx < 0) continue;
385 Kill(rx);
386 }
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000387}
388
389bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
390 MF = &mf;
391 TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000392 TRI = MF->getTarget().getRegisterInfo();
393 MBB = 0;
394
395 hasLiveRegs = false;
396 for (unsigned i=0, e = array_lengthof(LiveRegs); i != e; ++i)
397 LiveRegs[i] = 0;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000398
399 // If no XMM registers are used in the function, we can skip it completely.
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000400 bool anyregs = false;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000401 for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
402 E = X86::VR128RegClass.end(); I != E; ++I)
403 if (MF->getRegInfo().isPhysRegUsed(*I)) {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000404 anyregs = true;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000405 break;
406 }
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000407 if (!anyregs) return false;
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000408
409 MachineBasicBlock *Entry = MF->begin();
410 SmallPtrSet<MachineBasicBlock*, 16> Visited;
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000411 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000412 DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000413 DFI != DFE; ++DFI) {
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000414 enterBasicBlock(*DFI);
415 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
416 ++I) {
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000417 MachineInstr *mi = I;
418 if (mi->isDebugValue()) continue;
419 std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
420 if (domp.first)
421 if (domp.second)
422 visitSoftInstr(mi, domp.second);
423 else
424 visitHardInstr(mi, domp.first);
425 else if (hasLiveRegs)
426 visitGenericInstr(mi);
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000427 }
428 }
Jakob Stoklund Olesene4b94b42010-03-29 23:24:21 +0000429
430 Pool.Clear();
431
Jakob Stoklund Olesen352aa502010-03-25 17:25:00 +0000432 return false;
433}
434
435FunctionPass *llvm::createSSEDomainFixPass() {
436 return new SSEDomainFixPass();
437}