Chris Lattner | 4ad53bd | 2006-04-17 00:35:34 +0000 | [diff] [blame] | 1 | //===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===// |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Chris Lattner | 3060910 | 2007-12-29 20:37:13 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file computes an optimal sequence of instructions for doing all shuffles |
| 11 | // of two 4-element vectors. With a release build and when configured to emit |
| 12 | // an altivec instruction table, this takes about 30s to run on a 2.7Ghz |
| 13 | // PowerPC G5. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include <iostream> |
Jim Grosbach | 41045ba | 2010-10-14 00:12:49 +0000 | [diff] [blame] | 18 | #include <iomanip> |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 19 | #include <vector> |
Chris Lattner | 309db81 | 2006-04-18 00:21:25 +0000 | [diff] [blame] | 20 | #include <cassert> |
Anton Korobeynikov | ae9f3a3 | 2008-02-20 11:08:44 +0000 | [diff] [blame] | 21 | #include <cstdlib> |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 22 | struct Operator; |
| 23 | |
| 24 | // Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 25 | // an element from that value of the input vectors. A value of 8 means the |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 26 | // entry is undefined. |
| 27 | |
| 28 | // Mask manipulation functions. |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 29 | static inline unsigned short MakeMask(unsigned V0, unsigned V1, |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 30 | unsigned V2, unsigned V3) { |
| 31 | return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4)); |
| 32 | } |
| 33 | |
| 34 | /// getMaskElt - Return element N of the specified mask. |
| 35 | static unsigned getMaskElt(unsigned Mask, unsigned Elt) { |
| 36 | return (Mask >> ((3-Elt)*4)) & 0xF; |
| 37 | } |
| 38 | |
| 39 | static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) { |
| 40 | unsigned FieldShift = ((3-Elt)*4); |
| 41 | return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift); |
| 42 | } |
| 43 | |
| 44 | // Reject elements where the values are 9-15. |
| 45 | static bool isValidMask(unsigned short Mask) { |
| 46 | unsigned short UndefBits = Mask & 0x8888; |
| 47 | return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0; |
| 48 | } |
| 49 | |
| 50 | /// hasUndefElements - Return true if any of the elements in the mask are undefs |
| 51 | /// |
| 52 | static bool hasUndefElements(unsigned short Mask) { |
| 53 | return (Mask & 0x8888) != 0; |
| 54 | } |
| 55 | |
| 56 | /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not |
| 57 | /// including undef values.. |
| 58 | static bool isOnlyLHSMask(unsigned short Mask) { |
| 59 | return (Mask & 0x4444) == 0; |
| 60 | } |
| 61 | |
| 62 | /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to |
| 63 | /// refer to the LHS only (for when one argument value is passed into the same |
| 64 | /// function twice). |
Chris Lattner | 64a8ddd | 2006-11-03 01:45:13 +0000 | [diff] [blame] | 65 | #if 0 |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 66 | static unsigned short getLHSOnlyMask(unsigned short Mask) { |
| 67 | return Mask & 0xBBBB; // Keep only LHS and Undefs. |
| 68 | } |
Chris Lattner | 64a8ddd | 2006-11-03 01:45:13 +0000 | [diff] [blame] | 69 | #endif |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 70 | |
| 71 | /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4 |
| 72 | /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9. |
| 73 | static unsigned getCompressedMask(unsigned short Mask) { |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 74 | return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 75 | getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3); |
| 76 | } |
| 77 | |
| 78 | static void PrintMask(unsigned i, std::ostream &OS) { |
| 79 | OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0))) |
| 80 | << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1))) |
| 81 | << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2))) |
| 82 | << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3))) |
| 83 | << ">"; |
| 84 | } |
| 85 | |
| 86 | /// ShuffleVal - This represents a shufflevector operation. |
| 87 | struct ShuffleVal { |
| 88 | unsigned Cost; // Number of instrs used to generate this value. |
| 89 | Operator *Op; // The Operation used to generate this value. |
| 90 | unsigned short Arg0, Arg1; // Input operands for this value. |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 91 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 92 | ShuffleVal() : Cost(1000000) {} |
| 93 | }; |
| 94 | |
| 95 | |
| 96 | /// ShufTab - This is the actual shuffle table that we are trying to generate. |
| 97 | /// |
| 98 | static ShuffleVal ShufTab[65536]; |
| 99 | |
| 100 | /// TheOperators - All of the operators that this target supports. |
| 101 | static std::vector<Operator*> TheOperators; |
| 102 | |
| 103 | /// Operator - This is a vector operation that is available for use. |
| 104 | struct Operator { |
| 105 | unsigned short ShuffleMask; |
| 106 | unsigned short OpNum; |
| 107 | const char *Name; |
Anton Korobeynikov | c9d44f0 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 108 | unsigned Cost; |
| 109 | |
| 110 | Operator(unsigned short shufflemask, const char *name, unsigned opnum, |
| 111 | unsigned cost = 1) |
| 112 | : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) { |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 113 | TheOperators.push_back(this); |
| 114 | } |
| 115 | ~Operator() { |
| 116 | assert(TheOperators.back() == this); |
| 117 | TheOperators.pop_back(); |
| 118 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 119 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 120 | bool isOnlyLHSOperator() const { |
| 121 | return isOnlyLHSMask(ShuffleMask); |
| 122 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 123 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 124 | const char *getName() const { return Name; } |
Anton Korobeynikov | c9d44f0 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 125 | unsigned getCost() const { return Cost; } |
| 126 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 127 | unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) { |
| 128 | // Extract the elements from LHSMask and RHSMask, as appropriate. |
| 129 | unsigned Result = 0; |
| 130 | for (unsigned i = 0; i != 4; ++i) { |
| 131 | unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF; |
| 132 | unsigned ResElt; |
| 133 | if (SrcElt < 4) |
| 134 | ResElt = getMaskElt(LHSMask, SrcElt); |
| 135 | else if (SrcElt < 8) |
| 136 | ResElt = getMaskElt(RHSMask, SrcElt-4); |
| 137 | else { |
| 138 | assert(SrcElt == 8 && "Bad src elt!"); |
| 139 | ResElt = 8; |
| 140 | } |
| 141 | Result |= ResElt << (4*i); |
| 142 | } |
| 143 | return Result; |
| 144 | } |
| 145 | }; |
| 146 | |
| 147 | static const char *getZeroCostOpName(unsigned short Op) { |
| 148 | if (ShufTab[Op].Arg0 == 0x0123) |
| 149 | return "LHS"; |
| 150 | else if (ShufTab[Op].Arg0 == 0x4567) |
| 151 | return "RHS"; |
| 152 | else { |
| 153 | assert(0 && "bad zero cost operation"); |
| 154 | abort(); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | static void PrintOperation(unsigned ValNo, unsigned short Vals[]) { |
| 159 | unsigned short ThisOp = Vals[ValNo]; |
| 160 | std::cerr << "t" << ValNo; |
| 161 | PrintMask(ThisOp, std::cerr); |
| 162 | std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "("; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 163 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 164 | if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) { |
| 165 | std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0); |
| 166 | PrintMask(ShufTab[ThisOp].Arg0, std::cerr); |
| 167 | } else { |
| 168 | // Figure out what tmp # it is. |
| 169 | for (unsigned i = 0; ; ++i) |
| 170 | if (Vals[i] == ShufTab[ThisOp].Arg0) { |
| 171 | std::cerr << "t" << i; |
| 172 | break; |
| 173 | } |
| 174 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 175 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 176 | if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) { |
| 177 | std::cerr << ", "; |
| 178 | if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) { |
| 179 | std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1); |
| 180 | PrintMask(ShufTab[ThisOp].Arg1, std::cerr); |
| 181 | } else { |
| 182 | // Figure out what tmp # it is. |
| 183 | for (unsigned i = 0; ; ++i) |
| 184 | if (Vals[i] == ShufTab[ThisOp].Arg1) { |
| 185 | std::cerr << "t" << i; |
| 186 | break; |
| 187 | } |
| 188 | } |
| 189 | } |
| 190 | std::cerr << ") "; |
| 191 | } |
| 192 | |
| 193 | static unsigned getNumEntered() { |
| 194 | unsigned Count = 0; |
| 195 | for (unsigned i = 0; i != 65536; ++i) |
| 196 | Count += ShufTab[i].Cost < 100; |
| 197 | return Count; |
| 198 | } |
| 199 | |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 200 | static void EvaluateOps(unsigned short Elt, unsigned short Vals[], |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 201 | unsigned &NumVals) { |
| 202 | if (ShufTab[Elt].Cost == 0) return; |
| 203 | |
| 204 | // If this value has already been evaluated, it is free. FIXME: match undefs. |
| 205 | for (unsigned i = 0, e = NumVals; i != e; ++i) |
| 206 | if (Vals[i] == Elt) return; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 207 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 208 | // Otherwise, get the operands of the value, then add it. |
| 209 | unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1; |
| 210 | if (ShufTab[Arg0].Cost) |
| 211 | EvaluateOps(Arg0, Vals, NumVals); |
| 212 | if (Arg0 != Arg1 && ShufTab[Arg1].Cost) |
| 213 | EvaluateOps(Arg1, Vals, NumVals); |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 214 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 215 | Vals[NumVals++] = Elt; |
| 216 | } |
| 217 | |
| 218 | |
| 219 | int main() { |
| 220 | // Seed the table with accesses to the LHS and RHS. |
| 221 | ShufTab[0x0123].Cost = 0; |
| 222 | ShufTab[0x0123].Op = 0; |
| 223 | ShufTab[0x0123].Arg0 = 0x0123; |
| 224 | ShufTab[0x4567].Cost = 0; |
| 225 | ShufTab[0x4567].Op = 0; |
| 226 | ShufTab[0x4567].Arg0 = 0x4567; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 227 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 228 | // Seed the first-level of shuffles, shuffles whose inputs are the input to |
| 229 | // the vectorshuffle operation. |
| 230 | bool MadeChange = true; |
| 231 | unsigned OpCount = 0; |
| 232 | while (MadeChange) { |
| 233 | MadeChange = false; |
| 234 | ++OpCount; |
| 235 | std::cerr << "Starting iteration #" << OpCount << " with " |
| 236 | << getNumEntered() << " entries established.\n"; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 237 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 238 | // Scan the table for two reasons: First, compute the maximum cost of any |
| 239 | // operation left in the table. Second, make sure that values with undefs |
| 240 | // have the cheapest alternative that they match. |
| 241 | unsigned MaxCost = ShufTab[0].Cost; |
| 242 | for (unsigned i = 1; i != 0x8889; ++i) { |
| 243 | if (!isValidMask(i)) continue; |
| 244 | if (ShufTab[i].Cost > MaxCost) |
| 245 | MaxCost = ShufTab[i].Cost; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 246 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 247 | // If this value has an undef, make it be computed the cheapest possible |
| 248 | // way of any of the things that it matches. |
| 249 | if (hasUndefElements(i)) { |
| 250 | // This code is a little bit tricky, so here's the idea: consider some |
| 251 | // permutation, like 7u4u. To compute the lowest cost for 7u4u, we |
| 252 | // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If |
| 253 | // there are 3 undefs, the number rises to 729 entries we have to scan, |
| 254 | // and for the 4 undef case, we have to scan the whole table. |
| 255 | // |
| 256 | // Instead of doing this huge amount of scanning, we process the table |
| 257 | // entries *in order*, and use the fact that 'u' is 8, larger than any |
| 258 | // valid index. Given an entry like 7u4u then, we only need to scan |
| 259 | // 7[0-7]4u - 8 entries. We can get away with this, because we already |
| 260 | // know that each of 704u, 714u, 724u, etc contain the minimum value of |
| 261 | // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively. |
| 262 | unsigned UndefIdx; |
| 263 | if (i & 0x8000) |
| 264 | UndefIdx = 0; |
| 265 | else if (i & 0x0800) |
| 266 | UndefIdx = 1; |
| 267 | else if (i & 0x0080) |
| 268 | UndefIdx = 2; |
| 269 | else if (i & 0x0008) |
| 270 | UndefIdx = 3; |
| 271 | else |
| 272 | abort(); |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 273 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 274 | unsigned MinVal = i; |
| 275 | unsigned MinCost = ShufTab[i].Cost; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 276 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 277 | // Scan the 8 entries. |
| 278 | for (unsigned j = 0; j != 8; ++j) { |
| 279 | unsigned NewElt = setMaskElt(i, UndefIdx, j); |
| 280 | if (ShufTab[NewElt].Cost < MinCost) { |
| 281 | MinCost = ShufTab[NewElt].Cost; |
| 282 | MinVal = NewElt; |
| 283 | } |
| 284 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 285 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 286 | // If we found something cheaper than what was here before, use it. |
| 287 | if (i != MinVal) { |
| 288 | MadeChange = true; |
| 289 | ShufTab[i] = ShufTab[MinVal]; |
| 290 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 291 | } |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 292 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 293 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 294 | for (unsigned LHS = 0; LHS != 0x8889; ++LHS) { |
| 295 | if (!isValidMask(LHS)) continue; |
| 296 | if (ShufTab[LHS].Cost > 1000) continue; |
| 297 | |
| 298 | // If nothing involving this operand could possibly be cheaper than what |
| 299 | // we already have, don't consider it. |
| 300 | if (ShufTab[LHS].Cost + 1 >= MaxCost) |
| 301 | continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 302 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 303 | for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) { |
| 304 | Operator *Op = TheOperators[opnum]; |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 305 | |
| 306 | // Evaluate op(LHS,LHS) |
| 307 | unsigned ResultMask = Op->getTransformedMask(LHS, LHS); |
| 308 | |
Anton Korobeynikov | c9d44f0 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 309 | unsigned Cost = ShufTab[LHS].Cost + Op->getCost(); |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 310 | if (Cost < ShufTab[ResultMask].Cost) { |
| 311 | ShufTab[ResultMask].Cost = Cost; |
| 312 | ShufTab[ResultMask].Op = Op; |
| 313 | ShufTab[ResultMask].Arg0 = LHS; |
| 314 | ShufTab[ResultMask].Arg1 = LHS; |
| 315 | MadeChange = true; |
| 316 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 317 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 318 | // If this is a two input instruction, include the op(x,y) cases. If |
| 319 | // this is a one input instruction, skip this. |
| 320 | if (Op->isOnlyLHSOperator()) continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 321 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 322 | for (unsigned RHS = 0; RHS != 0x8889; ++RHS) { |
| 323 | if (!isValidMask(RHS)) continue; |
| 324 | if (ShufTab[RHS].Cost > 1000) continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 325 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 326 | // If nothing involving this operand could possibly be cheaper than |
| 327 | // what we already have, don't consider it. |
| 328 | if (ShufTab[RHS].Cost + 1 >= MaxCost) |
| 329 | continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 330 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 331 | |
| 332 | // Evaluate op(LHS,RHS) |
| 333 | unsigned ResultMask = Op->getTransformedMask(LHS, RHS); |
| 334 | |
| 335 | if (ShufTab[ResultMask].Cost <= OpCount || |
| 336 | ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost || |
| 337 | ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost) |
| 338 | continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 339 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 340 | // Figure out the cost to evaluate this, knowing that CSE's only need |
| 341 | // to be evaluated once. |
| 342 | unsigned short Vals[30]; |
| 343 | unsigned NumVals = 0; |
| 344 | EvaluateOps(LHS, Vals, NumVals); |
| 345 | EvaluateOps(RHS, Vals, NumVals); |
| 346 | |
Anton Korobeynikov | c9d44f0 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 347 | unsigned Cost = NumVals + Op->getCost(); |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 348 | if (Cost < ShufTab[ResultMask].Cost) { |
| 349 | ShufTab[ResultMask].Cost = Cost; |
| 350 | ShufTab[ResultMask].Op = Op; |
| 351 | ShufTab[ResultMask].Arg0 = LHS; |
| 352 | ShufTab[ResultMask].Arg1 = RHS; |
| 353 | MadeChange = true; |
| 354 | } |
| 355 | } |
| 356 | } |
| 357 | } |
| 358 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 359 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 360 | std::cerr << "Finished Table has " << getNumEntered() |
| 361 | << " entries established.\n"; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 362 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 363 | unsigned CostArray[10] = { 0 }; |
| 364 | |
| 365 | // Compute a cost histogram. |
| 366 | for (unsigned i = 0; i != 65536; ++i) { |
| 367 | if (!isValidMask(i)) continue; |
| 368 | if (ShufTab[i].Cost > 9) |
| 369 | ++CostArray[9]; |
| 370 | else |
| 371 | ++CostArray[ShufTab[i].Cost]; |
| 372 | } |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 373 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 374 | for (unsigned i = 0; i != 9; ++i) |
| 375 | if (CostArray[i]) |
| 376 | std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n"; |
| 377 | if (CostArray[9]) |
| 378 | std::cout << "// " << CostArray[9] << " entries have higher cost!\n"; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 379 | |
| 380 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 381 | // Build up the table to emit. |
| 382 | std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n"; |
Chris Lattner | 3033d4d | 2006-04-17 00:33:35 +0000 | [diff] [blame] | 383 | std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n"; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 384 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 385 | for (unsigned i = 0; i != 0x8889; ++i) { |
| 386 | if (!isValidMask(i)) continue; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 387 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 388 | // CostSat - The cost of this operation saturated to two bits. |
| 389 | unsigned CostSat = ShufTab[i].Cost; |
Chris Lattner | 8c3e847 | 2006-04-17 05:25:16 +0000 | [diff] [blame] | 390 | if (CostSat > 4) CostSat = 4; |
| 391 | if (CostSat == 0) CostSat = 1; |
| 392 | --CostSat; // Cost is now between 0-3. |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 393 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 394 | unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0; |
| 395 | assert(OpNum < 16 && "Too few bits to encode operation!"); |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 396 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 397 | unsigned LHS = getCompressedMask(ShufTab[i].Arg0); |
| 398 | unsigned RHS = getCompressedMask(ShufTab[i].Arg1); |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 399 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 400 | // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of |
| 401 | // LHS, and 13 bits of RHS = 32 bits. |
Chris Lattner | 195d8ad | 2006-04-17 05:05:52 +0000 | [diff] [blame] | 402 | unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS; |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 403 | |
Jim Grosbach | 41045ba | 2010-10-14 00:12:49 +0000 | [diff] [blame] | 404 | std::cout << " " << std::setw(10) << Val << "U, // "; |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 405 | PrintMask(i, std::cout); |
| 406 | std::cout << ": Cost " << ShufTab[i].Cost; |
| 407 | std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy"); |
| 408 | std::cout << " "; |
| 409 | if (ShufTab[ShufTab[i].Arg0].Cost == 0) { |
| 410 | std::cout << getZeroCostOpName(ShufTab[i].Arg0); |
| 411 | } else { |
| 412 | PrintMask(ShufTab[i].Arg0, std::cout); |
| 413 | } |
| 414 | |
| 415 | if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) { |
| 416 | std::cout << ", "; |
| 417 | if (ShufTab[ShufTab[i].Arg1].Cost == 0) { |
| 418 | std::cout << getZeroCostOpName(ShufTab[i].Arg1); |
| 419 | } else { |
| 420 | PrintMask(ShufTab[i].Arg1, std::cout); |
| 421 | } |
| 422 | } |
| 423 | std::cout << "\n"; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 424 | } |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 425 | std::cout << " 0\n};\n"; |
| 426 | |
| 427 | if (0) { |
| 428 | // Print out the table. |
| 429 | for (unsigned i = 0; i != 0x8889; ++i) { |
| 430 | if (!isValidMask(i)) continue; |
| 431 | if (ShufTab[i].Cost < 1000) { |
| 432 | PrintMask(i, std::cerr); |
| 433 | std::cerr << " - Cost " << ShufTab[i].Cost << " - "; |
Anton Korobeynikov | cb02dde | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 434 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 435 | unsigned short Vals[30]; |
| 436 | unsigned NumVals = 0; |
| 437 | EvaluateOps(i, Vals, NumVals); |
| 438 | |
| 439 | for (unsigned j = 0, e = NumVals; j != e; ++j) |
| 440 | PrintOperation(j, Vals); |
| 441 | std::cerr << "\n"; |
| 442 | } |
| 443 | } |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 448 | #ifdef GENERATE_ALTIVEC |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 449 | |
| 450 | ///===---------------------------------------------------------------------===// |
| 451 | /// The altivec instruction definitions. This is the altivec-specific part of |
| 452 | /// this file. |
| 453 | ///===---------------------------------------------------------------------===// |
| 454 | |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 455 | // Note that the opcode numbers here must match those in the PPC backend. |
| 456 | enum { |
| 457 | OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> |
| 458 | OP_VMRGHW, |
| 459 | OP_VMRGLW, |
| 460 | OP_VSPLTISW0, |
| 461 | OP_VSPLTISW1, |
| 462 | OP_VSPLTISW2, |
| 463 | OP_VSPLTISW3, |
| 464 | OP_VSLDOI4, |
| 465 | OP_VSLDOI8, |
Chris Lattner | d74ea2b | 2006-05-24 17:04:05 +0000 | [diff] [blame] | 466 | OP_VSLDOI12 |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 467 | }; |
| 468 | |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 469 | struct vmrghw : public Operator { |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 470 | vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {} |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 471 | } the_vmrghw; |
| 472 | |
| 473 | struct vmrglw : public Operator { |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 474 | vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {} |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 475 | } the_vmrglw; |
| 476 | |
| 477 | template<unsigned Elt> |
| 478 | struct vspltisw : public Operator { |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 479 | vspltisw(const char *N, unsigned Opc) |
| 480 | : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 481 | }; |
| 482 | |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 483 | vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0); |
| 484 | vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1); |
| 485 | vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2); |
| 486 | vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3); |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 487 | |
| 488 | template<unsigned N> |
| 489 | struct vsldoi : public Operator { |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 490 | vsldoi(const char *Name, unsigned Opc) |
| 491 | : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 492 | } |
| 493 | }; |
| 494 | |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 495 | vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4); |
| 496 | vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8); |
| 497 | vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12); |
Chris Lattner | 27e98aa | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 498 | |
Chris Lattner | cf1f644 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 499 | #endif |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 500 | |
| 501 | #define GENERATE_NEON |
| 502 | |
| 503 | #ifdef GENERATE_NEON |
| 504 | enum { |
| 505 | OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> |
| 506 | OP_VREV, |
| 507 | OP_VDUP0, |
| 508 | OP_VDUP1, |
| 509 | OP_VDUP2, |
| 510 | OP_VDUP3, |
| 511 | OP_VEXT1, |
| 512 | OP_VEXT2, |
| 513 | OP_VEXT3, |
| 514 | OP_VUZPL, // VUZP, left result |
| 515 | OP_VUZPR, // VUZP, right result |
| 516 | OP_VZIPL, // VZIP, left result |
| 517 | OP_VZIPR, // VZIP, right result |
| 518 | OP_VTRNL, // VTRN, left result |
| 519 | OP_VTRNR // VTRN, right result |
| 520 | }; |
| 521 | |
| 522 | struct vrev : public Operator { |
| 523 | vrev() : Operator(0x1032, "vrev", OP_VREV) {} |
| 524 | } the_vrev; |
| 525 | |
| 526 | template<unsigned Elt> |
| 527 | struct vdup : public Operator { |
| 528 | vdup(const char *N, unsigned Opc) |
| 529 | : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} |
| 530 | }; |
| 531 | |
| 532 | vdup<0> the_vdup0("vdup0", OP_VDUP0); |
| 533 | vdup<1> the_vdup1("vdup1", OP_VDUP1); |
| 534 | vdup<2> the_vdup2("vdup2", OP_VDUP2); |
| 535 | vdup<3> the_vdup3("vdup3", OP_VDUP3); |
| 536 | |
| 537 | template<unsigned N> |
| 538 | struct vext : public Operator { |
| 539 | vext(const char *Name, unsigned Opc) |
| 540 | : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { |
| 541 | } |
| 542 | }; |
| 543 | |
| 544 | vext<1> the_vext1("vext1", OP_VEXT1); |
| 545 | vext<2> the_vext2("vext2", OP_VEXT2); |
| 546 | vext<3> the_vext3("vext3", OP_VEXT3); |
| 547 | |
| 548 | struct vuzpl : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 549 | vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 550 | } the_vuzpl; |
| 551 | |
| 552 | struct vuzpr : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 553 | vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 554 | } the_vuzpr; |
| 555 | |
| 556 | struct vzipl : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 557 | vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 558 | } the_vzipl; |
| 559 | |
| 560 | struct vzipr : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 561 | vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 562 | } the_vzipr; |
| 563 | |
| 564 | struct vtrnl : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 565 | vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 566 | } the_vtrnl; |
| 567 | |
| 568 | struct vtrnr : public Operator { |
Anton Korobeynikov | 58cd84d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 569 | vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {} |
Anton Korobeynikov | 1c8e581 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 570 | } the_vtrnr; |
| 571 | |
| 572 | #endif |