blob: ae2e05f1d0a8dffa6072fbcff96754ff817fde45 [file] [log] [blame]
Chris Lattner27e98aa2006-04-17 00:30:41 +00001//===-- BuildShuffleTable.cpp - Perfect Shuffle Generator -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file computes an optimal sequence of instructions for doing all shuffles
11// of two 4-element vectors. With a release build and when configured to emit
12// an altivec instruction table, this takes about 30s to run on a 2.7Ghz
13// PowerPC G5.
14//
15//===----------------------------------------------------------------------===//
16
17#include <iostream>
18#include <vector>
19
20struct Operator;
21
22// Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes
23// an element from that value of the input vectors. A value of 8 means the
24// entry is undefined.
25
26// Mask manipulation functions.
27static inline unsigned short MakeMask(unsigned V0, unsigned V1,
28 unsigned V2, unsigned V3) {
29 return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
30}
31
32/// getMaskElt - Return element N of the specified mask.
33static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
34 return (Mask >> ((3-Elt)*4)) & 0xF;
35}
36
37static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
38 unsigned FieldShift = ((3-Elt)*4);
39 return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
40}
41
42// Reject elements where the values are 9-15.
43static bool isValidMask(unsigned short Mask) {
44 unsigned short UndefBits = Mask & 0x8888;
45 return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
46}
47
48/// hasUndefElements - Return true if any of the elements in the mask are undefs
49///
50static bool hasUndefElements(unsigned short Mask) {
51 return (Mask & 0x8888) != 0;
52}
53
54/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
55/// including undef values..
56static bool isOnlyLHSMask(unsigned short Mask) {
57 return (Mask & 0x4444) == 0;
58}
59
60/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
61/// refer to the LHS only (for when one argument value is passed into the same
62/// function twice).
63static unsigned short getLHSOnlyMask(unsigned short Mask) {
64 return Mask & 0xBBBB; // Keep only LHS and Undefs.
65}
66
67/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
68/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
69static unsigned getCompressedMask(unsigned short Mask) {
70 return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
71 getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3);
72}
73
74static void PrintMask(unsigned i, std::ostream &OS) {
75 OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
76 << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
77 << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
78 << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
79 << ">";
80}
81
82/// ShuffleVal - This represents a shufflevector operation.
83struct ShuffleVal {
84 unsigned Cost; // Number of instrs used to generate this value.
85 Operator *Op; // The Operation used to generate this value.
86 unsigned short Arg0, Arg1; // Input operands for this value.
87
88 ShuffleVal() : Cost(1000000) {}
89};
90
91
92/// ShufTab - This is the actual shuffle table that we are trying to generate.
93///
94static ShuffleVal ShufTab[65536];
95
96/// TheOperators - All of the operators that this target supports.
97static std::vector<Operator*> TheOperators;
98
99/// Operator - This is a vector operation that is available for use.
100struct Operator {
101 unsigned short ShuffleMask;
102 unsigned short OpNum;
103 const char *Name;
104
105 Operator(unsigned short shufflemask, const char *name)
106 : ShuffleMask(shufflemask), Name(name) {
107 OpNum = TheOperators.size();
108 TheOperators.push_back(this);
109 }
110 ~Operator() {
111 assert(TheOperators.back() == this);
112 TheOperators.pop_back();
113 }
114
115 bool isOnlyLHSOperator() const {
116 return isOnlyLHSMask(ShuffleMask);
117 }
118
119 const char *getName() const { return Name; }
120
121 unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
122 // Extract the elements from LHSMask and RHSMask, as appropriate.
123 unsigned Result = 0;
124 for (unsigned i = 0; i != 4; ++i) {
125 unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
126 unsigned ResElt;
127 if (SrcElt < 4)
128 ResElt = getMaskElt(LHSMask, SrcElt);
129 else if (SrcElt < 8)
130 ResElt = getMaskElt(RHSMask, SrcElt-4);
131 else {
132 assert(SrcElt == 8 && "Bad src elt!");
133 ResElt = 8;
134 }
135 Result |= ResElt << (4*i);
136 }
137 return Result;
138 }
139};
140
141static const char *getZeroCostOpName(unsigned short Op) {
142 if (ShufTab[Op].Arg0 == 0x0123)
143 return "LHS";
144 else if (ShufTab[Op].Arg0 == 0x4567)
145 return "RHS";
146 else {
147 assert(0 && "bad zero cost operation");
148 abort();
149 }
150}
151
152static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
153 unsigned short ThisOp = Vals[ValNo];
154 std::cerr << "t" << ValNo;
155 PrintMask(ThisOp, std::cerr);
156 std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
157
158 if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
159 std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
160 PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
161 } else {
162 // Figure out what tmp # it is.
163 for (unsigned i = 0; ; ++i)
164 if (Vals[i] == ShufTab[ThisOp].Arg0) {
165 std::cerr << "t" << i;
166 break;
167 }
168 }
169
170 if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
171 std::cerr << ", ";
172 if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
173 std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
174 PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
175 } else {
176 // Figure out what tmp # it is.
177 for (unsigned i = 0; ; ++i)
178 if (Vals[i] == ShufTab[ThisOp].Arg1) {
179 std::cerr << "t" << i;
180 break;
181 }
182 }
183 }
184 std::cerr << ") ";
185}
186
187static unsigned getNumEntered() {
188 unsigned Count = 0;
189 for (unsigned i = 0; i != 65536; ++i)
190 Count += ShufTab[i].Cost < 100;
191 return Count;
192}
193
194static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
195 unsigned &NumVals) {
196 if (ShufTab[Elt].Cost == 0) return;
197
198 // If this value has already been evaluated, it is free. FIXME: match undefs.
199 for (unsigned i = 0, e = NumVals; i != e; ++i)
200 if (Vals[i] == Elt) return;
201
202 // Otherwise, get the operands of the value, then add it.
203 unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
204 if (ShufTab[Arg0].Cost)
205 EvaluateOps(Arg0, Vals, NumVals);
206 if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
207 EvaluateOps(Arg1, Vals, NumVals);
208
209 Vals[NumVals++] = Elt;
210}
211
212
213int main() {
214 // Seed the table with accesses to the LHS and RHS.
215 ShufTab[0x0123].Cost = 0;
216 ShufTab[0x0123].Op = 0;
217 ShufTab[0x0123].Arg0 = 0x0123;
218 ShufTab[0x4567].Cost = 0;
219 ShufTab[0x4567].Op = 0;
220 ShufTab[0x4567].Arg0 = 0x4567;
221
222 // Seed the first-level of shuffles, shuffles whose inputs are the input to
223 // the vectorshuffle operation.
224 bool MadeChange = true;
225 unsigned OpCount = 0;
226 while (MadeChange) {
227 MadeChange = false;
228 ++OpCount;
229 std::cerr << "Starting iteration #" << OpCount << " with "
230 << getNumEntered() << " entries established.\n";
231
232 // Scan the table for two reasons: First, compute the maximum cost of any
233 // operation left in the table. Second, make sure that values with undefs
234 // have the cheapest alternative that they match.
235 unsigned MaxCost = ShufTab[0].Cost;
236 for (unsigned i = 1; i != 0x8889; ++i) {
237 if (!isValidMask(i)) continue;
238 if (ShufTab[i].Cost > MaxCost)
239 MaxCost = ShufTab[i].Cost;
240
241 // If this value has an undef, make it be computed the cheapest possible
242 // way of any of the things that it matches.
243 if (hasUndefElements(i)) {
244 // This code is a little bit tricky, so here's the idea: consider some
245 // permutation, like 7u4u. To compute the lowest cost for 7u4u, we
246 // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If
247 // there are 3 undefs, the number rises to 729 entries we have to scan,
248 // and for the 4 undef case, we have to scan the whole table.
249 //
250 // Instead of doing this huge amount of scanning, we process the table
251 // entries *in order*, and use the fact that 'u' is 8, larger than any
252 // valid index. Given an entry like 7u4u then, we only need to scan
253 // 7[0-7]4u - 8 entries. We can get away with this, because we already
254 // know that each of 704u, 714u, 724u, etc contain the minimum value of
255 // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
256 unsigned UndefIdx;
257 if (i & 0x8000)
258 UndefIdx = 0;
259 else if (i & 0x0800)
260 UndefIdx = 1;
261 else if (i & 0x0080)
262 UndefIdx = 2;
263 else if (i & 0x0008)
264 UndefIdx = 3;
265 else
266 abort();
267
268 unsigned MinVal = i;
269 unsigned MinCost = ShufTab[i].Cost;
270
271 // Scan the 8 entries.
272 for (unsigned j = 0; j != 8; ++j) {
273 unsigned NewElt = setMaskElt(i, UndefIdx, j);
274 if (ShufTab[NewElt].Cost < MinCost) {
275 MinCost = ShufTab[NewElt].Cost;
276 MinVal = NewElt;
277 }
278 }
279
280 // If we found something cheaper than what was here before, use it.
281 if (i != MinVal) {
282 MadeChange = true;
283 ShufTab[i] = ShufTab[MinVal];
284 }
285 }
286 }
287
288 for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
289 if (!isValidMask(LHS)) continue;
290 if (ShufTab[LHS].Cost > 1000) continue;
291
292 // If nothing involving this operand could possibly be cheaper than what
293 // we already have, don't consider it.
294 if (ShufTab[LHS].Cost + 1 >= MaxCost)
295 continue;
296
297 for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
298 Operator *Op = TheOperators[opnum];
299 unsigned short Mask = Op->ShuffleMask;
300
301 // Evaluate op(LHS,LHS)
302 unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
303
304 unsigned Cost = ShufTab[LHS].Cost + 1;
305 if (Cost < ShufTab[ResultMask].Cost) {
306 ShufTab[ResultMask].Cost = Cost;
307 ShufTab[ResultMask].Op = Op;
308 ShufTab[ResultMask].Arg0 = LHS;
309 ShufTab[ResultMask].Arg1 = LHS;
310 MadeChange = true;
311 }
312
313 // If this is a two input instruction, include the op(x,y) cases. If
314 // this is a one input instruction, skip this.
315 if (Op->isOnlyLHSOperator()) continue;
316
317 for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
318 if (!isValidMask(RHS)) continue;
319 if (ShufTab[RHS].Cost > 1000) continue;
320
321 // If nothing involving this operand could possibly be cheaper than
322 // what we already have, don't consider it.
323 if (ShufTab[RHS].Cost + 1 >= MaxCost)
324 continue;
325
326
327 // Evaluate op(LHS,RHS)
328 unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
329
330 if (ShufTab[ResultMask].Cost <= OpCount ||
331 ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
332 ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
333 continue;
334
335 // Figure out the cost to evaluate this, knowing that CSE's only need
336 // to be evaluated once.
337 unsigned short Vals[30];
338 unsigned NumVals = 0;
339 EvaluateOps(LHS, Vals, NumVals);
340 EvaluateOps(RHS, Vals, NumVals);
341
342 unsigned Cost = NumVals + 1;
343 if (Cost < ShufTab[ResultMask].Cost) {
344 ShufTab[ResultMask].Cost = Cost;
345 ShufTab[ResultMask].Op = Op;
346 ShufTab[ResultMask].Arg0 = LHS;
347 ShufTab[ResultMask].Arg1 = RHS;
348 MadeChange = true;
349 }
350 }
351 }
352 }
353 }
354
355 std::cerr << "Finished Table has " << getNumEntered()
356 << " entries established.\n";
357
358 unsigned CostArray[10] = { 0 };
359
360 // Compute a cost histogram.
361 for (unsigned i = 0; i != 65536; ++i) {
362 if (!isValidMask(i)) continue;
363 if (ShufTab[i].Cost > 9)
364 ++CostArray[9];
365 else
366 ++CostArray[ShufTab[i].Cost];
367 }
368
369 for (unsigned i = 0; i != 9; ++i)
370 if (CostArray[i])
371 std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
372 if (CostArray[9])
373 std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
374
375
376 // Build up the table to emit.
377 std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
378 std::cout << "static const unsigned InstrTab[6561+1] = {\n";
379
380 for (unsigned i = 0; i != 0x8889; ++i) {
381 if (!isValidMask(i)) continue;
382
383 // CostSat - The cost of this operation saturated to two bits.
384 unsigned CostSat = ShufTab[i].Cost;
385 if (CostSat > 3) CostSat = 3;
386
387 unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
388 assert(OpNum < 16 && "Too few bits to encode operation!");
389
390 unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
391 unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
392
393 // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
394 // LHS, and 13 bits of RHS = 32 bits.
395 unsigned Val = (CostSat << 30) | (OpNum << 27) | (LHS << 13) | RHS;
396
397 std::cout << " " << Val << "U,\t// ";
398 PrintMask(i, std::cout);
399 std::cout << ": Cost " << ShufTab[i].Cost;
400 std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
401 std::cout << " ";
402 if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
403 std::cout << getZeroCostOpName(ShufTab[i].Arg0);
404 } else {
405 PrintMask(ShufTab[i].Arg0, std::cout);
406 }
407
408 if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
409 std::cout << ", ";
410 if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
411 std::cout << getZeroCostOpName(ShufTab[i].Arg1);
412 } else {
413 PrintMask(ShufTab[i].Arg1, std::cout);
414 }
415 }
416 std::cout << "\n";
417 }
418 std::cout << " 0\n};\n";
419
420 if (0) {
421 // Print out the table.
422 for (unsigned i = 0; i != 0x8889; ++i) {
423 if (!isValidMask(i)) continue;
424 if (ShufTab[i].Cost < 1000) {
425 PrintMask(i, std::cerr);
426 std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
427
428 unsigned short Vals[30];
429 unsigned NumVals = 0;
430 EvaluateOps(i, Vals, NumVals);
431
432 for (unsigned j = 0, e = NumVals; j != e; ++j)
433 PrintOperation(j, Vals);
434 std::cerr << "\n";
435 }
436 }
437 }
438}
439
440
441
442///===---------------------------------------------------------------------===//
443/// The altivec instruction definitions. This is the altivec-specific part of
444/// this file.
445///===---------------------------------------------------------------------===//
446
447struct vmrghw : public Operator {
448 vmrghw() : Operator(0x0415, "vmrghw") {}
449} the_vmrghw;
450
451struct vmrglw : public Operator {
452 vmrglw() : Operator(0x2637, "vmrglw") {}
453} the_vmrglw;
454
455template<unsigned Elt>
456struct vspltisw : public Operator {
457 vspltisw(const char *N) : Operator(MakeMask(Elt, Elt, Elt, Elt), N) {}
458};
459
460vspltisw<0> the_vspltisw0("vspltisw0");
461vspltisw<1> the_vspltisw1("vspltisw1");
462vspltisw<2> the_vspltisw2("vspltisw2");
463vspltisw<3> the_vspltisw3("vspltisw3");
464
465template<unsigned N>
466struct vsldoi : public Operator {
467 vsldoi(const char *n) : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), n){
468 }
469};
470
471vsldoi<1> the_vsldoi1("vsldoi4");
472vsldoi<2> the_vsldoi2("vsldoi8");
473vsldoi<3> the_vsldoi3("vsldoi12");
474