| Chris Lattner | ac66b4a | 2006-04-17 00:35:34 +0000 | [diff] [blame] | 1 | //===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===// | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
| Chris Lattner | 8adcd9f | 2007-12-29 20:37:13 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // This file computes an optimal sequence of instructions for doing all shuffles | 
|  | 11 | // of two 4-element vectors.  With a release build and when configured to emit | 
|  | 12 | // an altivec instruction table, this takes about 30s to run on a 2.7Ghz | 
|  | 13 | // PowerPC G5. | 
|  | 14 | // | 
|  | 15 | //===----------------------------------------------------------------------===// | 
|  | 16 |  | 
| Chris Lattner | 3db2056 | 2006-04-18 00:21:25 +0000 | [diff] [blame] | 17 | #include <cassert> | 
| Anton Korobeynikov | 579f071 | 2008-02-20 11:08:44 +0000 | [diff] [blame] | 18 | #include <cstdlib> | 
| Chandler Carruth | 91d19d8 | 2012-12-04 10:37:14 +0000 | [diff] [blame] | 19 | #include <iomanip> | 
|  | 20 | #include <iostream> | 
|  | 21 | #include <vector> | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 22 | struct Operator; | 
|  | 23 |  | 
|  | 24 | // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 25 | // an element from that value of the input vectors.  A value of 8 means the | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 26 | // entry is undefined. | 
|  | 27 |  | 
|  | 28 | // Mask manipulation functions. | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 29 | static inline unsigned short MakeMask(unsigned V0, unsigned V1, | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 30 | unsigned V2, unsigned V3) { | 
|  | 31 | return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4)); | 
|  | 32 | } | 
|  | 33 |  | 
|  | 34 | /// getMaskElt - Return element N of the specified mask. | 
|  | 35 | static unsigned getMaskElt(unsigned Mask, unsigned Elt) { | 
|  | 36 | return (Mask >> ((3-Elt)*4)) & 0xF; | 
|  | 37 | } | 
|  | 38 |  | 
|  | 39 | static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) { | 
|  | 40 | unsigned FieldShift = ((3-Elt)*4); | 
|  | 41 | return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift); | 
|  | 42 | } | 
|  | 43 |  | 
|  | 44 | // Reject elements where the values are 9-15. | 
|  | 45 | static bool isValidMask(unsigned short Mask) { | 
|  | 46 | unsigned short UndefBits = Mask & 0x8888; | 
|  | 47 | return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0; | 
|  | 48 | } | 
|  | 49 |  | 
|  | 50 | /// hasUndefElements - Return true if any of the elements in the mask are undefs | 
|  | 51 | /// | 
|  | 52 | static bool hasUndefElements(unsigned short Mask) { | 
|  | 53 | return (Mask & 0x8888) != 0; | 
|  | 54 | } | 
|  | 55 |  | 
|  | 56 | /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not | 
|  | 57 | /// including undef values.. | 
|  | 58 | static bool isOnlyLHSMask(unsigned short Mask) { | 
|  | 59 | return (Mask & 0x4444) == 0; | 
|  | 60 | } | 
|  | 61 |  | 
|  | 62 | /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to | 
|  | 63 | /// refer to the LHS only (for when one argument value is passed into the same | 
|  | 64 | /// function twice). | 
| Chris Lattner | 16e05c3 | 2006-11-03 01:45:13 +0000 | [diff] [blame] | 65 | #if 0 | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 66 | static unsigned short getLHSOnlyMask(unsigned short Mask) { | 
|  | 67 | return Mask & 0xBBBB;  // Keep only LHS and Undefs. | 
|  | 68 | } | 
| Chris Lattner | 16e05c3 | 2006-11-03 01:45:13 +0000 | [diff] [blame] | 69 | #endif | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 70 |  | 
|  | 71 | /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4 | 
|  | 72 | /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9. | 
|  | 73 | static unsigned getCompressedMask(unsigned short Mask) { | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 74 | return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 75 | getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3); | 
|  | 76 | } | 
|  | 77 |  | 
|  | 78 | static void PrintMask(unsigned i, std::ostream &OS) { | 
|  | 79 | OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0))) | 
|  | 80 | << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1))) | 
|  | 81 | << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2))) | 
|  | 82 | << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3))) | 
|  | 83 | << ">"; | 
|  | 84 | } | 
|  | 85 |  | 
|  | 86 | /// ShuffleVal - This represents a shufflevector operation. | 
|  | 87 | struct ShuffleVal { | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 88 | Operator *Op;   // The Operation used to generate this value. | 
| Ben Craig | 46642ff | 2015-12-14 21:57:05 +0000 | [diff] [blame] | 89 | unsigned Cost;  // Number of instrs used to generate this value. | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 90 | unsigned short Arg0, Arg1;  // Input operands for this value. | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 91 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 92 | ShuffleVal() : Cost(1000000) {} | 
|  | 93 | }; | 
|  | 94 |  | 
|  | 95 |  | 
|  | 96 | /// ShufTab - This is the actual shuffle table that we are trying to generate. | 
|  | 97 | /// | 
|  | 98 | static ShuffleVal ShufTab[65536]; | 
|  | 99 |  | 
|  | 100 | /// TheOperators - All of the operators that this target supports. | 
|  | 101 | static std::vector<Operator*> TheOperators; | 
|  | 102 |  | 
|  | 103 | /// Operator - This is a vector operation that is available for use. | 
|  | 104 | struct Operator { | 
| Ben Craig | 46642ff | 2015-12-14 21:57:05 +0000 | [diff] [blame] | 105 | const char *Name; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 106 | unsigned short ShuffleMask; | 
|  | 107 | unsigned short OpNum; | 
| Anton Korobeynikov | 84e2438 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 108 | unsigned Cost; | 
|  | 109 |  | 
|  | 110 | Operator(unsigned short shufflemask, const char *name, unsigned opnum, | 
|  | 111 | unsigned cost = 1) | 
| Ben Craig | 46642ff | 2015-12-14 21:57:05 +0000 | [diff] [blame] | 112 | :  Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) { | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 113 | TheOperators.push_back(this); | 
|  | 114 | } | 
|  | 115 | ~Operator() { | 
|  | 116 | assert(TheOperators.back() == this); | 
|  | 117 | TheOperators.pop_back(); | 
|  | 118 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 119 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 120 | bool isOnlyLHSOperator() const { | 
|  | 121 | return isOnlyLHSMask(ShuffleMask); | 
|  | 122 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 123 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 124 | const char *getName() const { return Name; } | 
| Anton Korobeynikov | 84e2438 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 125 | unsigned getCost() const { return Cost; } | 
|  | 126 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 127 | unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) { | 
|  | 128 | // Extract the elements from LHSMask and RHSMask, as appropriate. | 
|  | 129 | unsigned Result = 0; | 
|  | 130 | for (unsigned i = 0; i != 4; ++i) { | 
|  | 131 | unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF; | 
|  | 132 | unsigned ResElt; | 
|  | 133 | if (SrcElt < 4) | 
|  | 134 | ResElt = getMaskElt(LHSMask, SrcElt); | 
|  | 135 | else if (SrcElt < 8) | 
|  | 136 | ResElt = getMaskElt(RHSMask, SrcElt-4); | 
|  | 137 | else { | 
|  | 138 | assert(SrcElt == 8 && "Bad src elt!"); | 
|  | 139 | ResElt = 8; | 
|  | 140 | } | 
|  | 141 | Result |= ResElt << (4*i); | 
|  | 142 | } | 
|  | 143 | return Result; | 
|  | 144 | } | 
|  | 145 | }; | 
|  | 146 |  | 
|  | 147 | static const char *getZeroCostOpName(unsigned short Op) { | 
|  | 148 | if (ShufTab[Op].Arg0 == 0x0123) | 
|  | 149 | return "LHS"; | 
|  | 150 | else if (ShufTab[Op].Arg0 == 0x4567) | 
|  | 151 | return "RHS"; | 
|  | 152 | else { | 
|  | 153 | assert(0 && "bad zero cost operation"); | 
|  | 154 | abort(); | 
|  | 155 | } | 
|  | 156 | } | 
|  | 157 |  | 
|  | 158 | static void PrintOperation(unsigned ValNo, unsigned short Vals[]) { | 
|  | 159 | unsigned short ThisOp = Vals[ValNo]; | 
|  | 160 | std::cerr << "t" << ValNo; | 
|  | 161 | PrintMask(ThisOp, std::cerr); | 
|  | 162 | std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "("; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 163 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 164 | if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) { | 
|  | 165 | std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0); | 
|  | 166 | PrintMask(ShufTab[ThisOp].Arg0, std::cerr); | 
|  | 167 | } else { | 
|  | 168 | // Figure out what tmp # it is. | 
|  | 169 | for (unsigned i = 0; ; ++i) | 
|  | 170 | if (Vals[i] == ShufTab[ThisOp].Arg0) { | 
|  | 171 | std::cerr << "t" << i; | 
|  | 172 | break; | 
|  | 173 | } | 
|  | 174 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 175 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 176 | if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) { | 
|  | 177 | std::cerr << ", "; | 
|  | 178 | if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) { | 
|  | 179 | std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1); | 
|  | 180 | PrintMask(ShufTab[ThisOp].Arg1, std::cerr); | 
|  | 181 | } else { | 
|  | 182 | // Figure out what tmp # it is. | 
|  | 183 | for (unsigned i = 0; ; ++i) | 
|  | 184 | if (Vals[i] == ShufTab[ThisOp].Arg1) { | 
|  | 185 | std::cerr << "t" << i; | 
|  | 186 | break; | 
|  | 187 | } | 
|  | 188 | } | 
|  | 189 | } | 
|  | 190 | std::cerr << ")  "; | 
|  | 191 | } | 
|  | 192 |  | 
|  | 193 | static unsigned getNumEntered() { | 
|  | 194 | unsigned Count = 0; | 
|  | 195 | for (unsigned i = 0; i != 65536; ++i) | 
|  | 196 | Count += ShufTab[i].Cost < 100; | 
|  | 197 | return Count; | 
|  | 198 | } | 
|  | 199 |  | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 200 | static void EvaluateOps(unsigned short Elt, unsigned short Vals[], | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 201 | unsigned &NumVals) { | 
|  | 202 | if (ShufTab[Elt].Cost == 0) return; | 
|  | 203 |  | 
|  | 204 | // If this value has already been evaluated, it is free.  FIXME: match undefs. | 
|  | 205 | for (unsigned i = 0, e = NumVals; i != e; ++i) | 
|  | 206 | if (Vals[i] == Elt) return; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 207 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 208 | // Otherwise, get the operands of the value, then add it. | 
|  | 209 | unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1; | 
|  | 210 | if (ShufTab[Arg0].Cost) | 
|  | 211 | EvaluateOps(Arg0, Vals, NumVals); | 
|  | 212 | if (Arg0 != Arg1 && ShufTab[Arg1].Cost) | 
|  | 213 | EvaluateOps(Arg1, Vals, NumVals); | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 214 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 215 | Vals[NumVals++] = Elt; | 
|  | 216 | } | 
|  | 217 |  | 
|  | 218 |  | 
|  | 219 | int main() { | 
|  | 220 | // Seed the table with accesses to the LHS and RHS. | 
|  | 221 | ShufTab[0x0123].Cost = 0; | 
| Craig Topper | 66f09ad | 2014-06-08 22:29:17 +0000 | [diff] [blame] | 222 | ShufTab[0x0123].Op = nullptr; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 223 | ShufTab[0x0123].Arg0 = 0x0123; | 
|  | 224 | ShufTab[0x4567].Cost = 0; | 
| Craig Topper | 66f09ad | 2014-06-08 22:29:17 +0000 | [diff] [blame] | 225 | ShufTab[0x4567].Op = nullptr; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 226 | ShufTab[0x4567].Arg0 = 0x4567; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 227 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 228 | // Seed the first-level of shuffles, shuffles whose inputs are the input to | 
|  | 229 | // the vectorshuffle operation. | 
|  | 230 | bool MadeChange = true; | 
|  | 231 | unsigned OpCount = 0; | 
|  | 232 | while (MadeChange) { | 
|  | 233 | MadeChange = false; | 
|  | 234 | ++OpCount; | 
|  | 235 | std::cerr << "Starting iteration #" << OpCount << " with " | 
|  | 236 | << getNumEntered() << " entries established.\n"; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 237 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 238 | // Scan the table for two reasons: First, compute the maximum cost of any | 
|  | 239 | // operation left in the table.  Second, make sure that values with undefs | 
|  | 240 | // have the cheapest alternative that they match. | 
|  | 241 | unsigned MaxCost = ShufTab[0].Cost; | 
|  | 242 | for (unsigned i = 1; i != 0x8889; ++i) { | 
|  | 243 | if (!isValidMask(i)) continue; | 
|  | 244 | if (ShufTab[i].Cost > MaxCost) | 
|  | 245 | MaxCost = ShufTab[i].Cost; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 246 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 247 | // If this value has an undef, make it be computed the cheapest possible | 
|  | 248 | // way of any of the things that it matches. | 
|  | 249 | if (hasUndefElements(i)) { | 
|  | 250 | // This code is a little bit tricky, so here's the idea: consider some | 
|  | 251 | // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we | 
|  | 252 | // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If | 
|  | 253 | // there are 3 undefs, the number rises to 729 entries we have to scan, | 
|  | 254 | // and for the 4 undef case, we have to scan the whole table. | 
|  | 255 | // | 
|  | 256 | // Instead of doing this huge amount of scanning, we process the table | 
|  | 257 | // entries *in order*, and use the fact that 'u' is 8, larger than any | 
|  | 258 | // valid index.  Given an entry like 7u4u then, we only need to scan | 
|  | 259 | // 7[0-7]4u - 8 entries.  We can get away with this, because we already | 
|  | 260 | // know that each of 704u, 714u, 724u, etc contain the minimum value of | 
|  | 261 | // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively. | 
|  | 262 | unsigned UndefIdx; | 
|  | 263 | if (i & 0x8000) | 
|  | 264 | UndefIdx = 0; | 
|  | 265 | else if (i & 0x0800) | 
|  | 266 | UndefIdx = 1; | 
|  | 267 | else if (i & 0x0080) | 
|  | 268 | UndefIdx = 2; | 
|  | 269 | else if (i & 0x0008) | 
|  | 270 | UndefIdx = 3; | 
|  | 271 | else | 
|  | 272 | abort(); | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 273 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 274 | unsigned MinVal  = i; | 
|  | 275 | unsigned MinCost = ShufTab[i].Cost; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 276 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 277 | // Scan the 8 entries. | 
|  | 278 | for (unsigned j = 0; j != 8; ++j) { | 
|  | 279 | unsigned NewElt = setMaskElt(i, UndefIdx, j); | 
|  | 280 | if (ShufTab[NewElt].Cost < MinCost) { | 
|  | 281 | MinCost = ShufTab[NewElt].Cost; | 
|  | 282 | MinVal = NewElt; | 
|  | 283 | } | 
|  | 284 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 285 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 286 | // If we found something cheaper than what was here before, use it. | 
|  | 287 | if (i != MinVal) { | 
|  | 288 | MadeChange = true; | 
|  | 289 | ShufTab[i] = ShufTab[MinVal]; | 
|  | 290 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 291 | } | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 292 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 293 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 294 | for (unsigned LHS = 0; LHS != 0x8889; ++LHS) { | 
|  | 295 | if (!isValidMask(LHS)) continue; | 
|  | 296 | if (ShufTab[LHS].Cost > 1000) continue; | 
|  | 297 |  | 
|  | 298 | // If nothing involving this operand could possibly be cheaper than what | 
|  | 299 | // we already have, don't consider it. | 
|  | 300 | if (ShufTab[LHS].Cost + 1 >= MaxCost) | 
|  | 301 | continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 302 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 303 | for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) { | 
|  | 304 | Operator *Op = TheOperators[opnum]; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 305 |  | 
|  | 306 | // Evaluate op(LHS,LHS) | 
|  | 307 | unsigned ResultMask = Op->getTransformedMask(LHS, LHS); | 
|  | 308 |  | 
| Anton Korobeynikov | 84e2438 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 309 | unsigned Cost = ShufTab[LHS].Cost + Op->getCost(); | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 310 | if (Cost < ShufTab[ResultMask].Cost) { | 
|  | 311 | ShufTab[ResultMask].Cost = Cost; | 
|  | 312 | ShufTab[ResultMask].Op = Op; | 
|  | 313 | ShufTab[ResultMask].Arg0 = LHS; | 
|  | 314 | ShufTab[ResultMask].Arg1 = LHS; | 
|  | 315 | MadeChange = true; | 
|  | 316 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 317 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 318 | // If this is a two input instruction, include the op(x,y) cases.  If | 
|  | 319 | // this is a one input instruction, skip this. | 
|  | 320 | if (Op->isOnlyLHSOperator()) continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 321 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 322 | for (unsigned RHS = 0; RHS != 0x8889; ++RHS) { | 
|  | 323 | if (!isValidMask(RHS)) continue; | 
|  | 324 | if (ShufTab[RHS].Cost > 1000) continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 325 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 326 | // If nothing involving this operand could possibly be cheaper than | 
|  | 327 | // what we already have, don't consider it. | 
|  | 328 | if (ShufTab[RHS].Cost + 1 >= MaxCost) | 
|  | 329 | continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 330 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 331 |  | 
|  | 332 | // Evaluate op(LHS,RHS) | 
|  | 333 | unsigned ResultMask = Op->getTransformedMask(LHS, RHS); | 
|  | 334 |  | 
|  | 335 | if (ShufTab[ResultMask].Cost <= OpCount || | 
|  | 336 | ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost || | 
|  | 337 | ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost) | 
|  | 338 | continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 339 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 340 | // Figure out the cost to evaluate this, knowing that CSE's only need | 
|  | 341 | // to be evaluated once. | 
|  | 342 | unsigned short Vals[30]; | 
|  | 343 | unsigned NumVals = 0; | 
|  | 344 | EvaluateOps(LHS, Vals, NumVals); | 
|  | 345 | EvaluateOps(RHS, Vals, NumVals); | 
|  | 346 |  | 
| Anton Korobeynikov | 84e2438 | 2009-08-21 12:41:03 +0000 | [diff] [blame] | 347 | unsigned Cost = NumVals + Op->getCost(); | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 348 | if (Cost < ShufTab[ResultMask].Cost) { | 
|  | 349 | ShufTab[ResultMask].Cost = Cost; | 
|  | 350 | ShufTab[ResultMask].Op = Op; | 
|  | 351 | ShufTab[ResultMask].Arg0 = LHS; | 
|  | 352 | ShufTab[ResultMask].Arg1 = RHS; | 
|  | 353 | MadeChange = true; | 
|  | 354 | } | 
|  | 355 | } | 
|  | 356 | } | 
|  | 357 | } | 
|  | 358 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 359 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 360 | std::cerr << "Finished Table has " << getNumEntered() | 
|  | 361 | << " entries established.\n"; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 362 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 363 | unsigned CostArray[10] = { 0 }; | 
|  | 364 |  | 
|  | 365 | // Compute a cost histogram. | 
|  | 366 | for (unsigned i = 0; i != 65536; ++i) { | 
|  | 367 | if (!isValidMask(i)) continue; | 
|  | 368 | if (ShufTab[i].Cost > 9) | 
|  | 369 | ++CostArray[9]; | 
|  | 370 | else | 
|  | 371 | ++CostArray[ShufTab[i].Cost]; | 
|  | 372 | } | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 373 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 374 | for (unsigned i = 0; i != 9; ++i) | 
|  | 375 | if (CostArray[i]) | 
|  | 376 | std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n"; | 
|  | 377 | if (CostArray[9]) | 
|  | 378 | std::cout << "// " << CostArray[9] << " entries have higher cost!\n"; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 379 |  | 
|  | 380 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 381 | // Build up the table to emit. | 
|  | 382 | std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n"; | 
| Chris Lattner | 156c85e | 2006-04-17 00:33:35 +0000 | [diff] [blame] | 383 | std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n"; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 384 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 385 | for (unsigned i = 0; i != 0x8889; ++i) { | 
|  | 386 | if (!isValidMask(i)) continue; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 387 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 388 | // CostSat - The cost of this operation saturated to two bits. | 
|  | 389 | unsigned CostSat = ShufTab[i].Cost; | 
| Chris Lattner | e2e2cc5 | 2006-04-17 05:25:16 +0000 | [diff] [blame] | 390 | if (CostSat > 4) CostSat = 4; | 
|  | 391 | if (CostSat == 0) CostSat = 1; | 
|  | 392 | --CostSat;  // Cost is now between 0-3. | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 393 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 394 | unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0; | 
|  | 395 | assert(OpNum < 16 && "Too few bits to encode operation!"); | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 396 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 397 | unsigned LHS = getCompressedMask(ShufTab[i].Arg0); | 
|  | 398 | unsigned RHS = getCompressedMask(ShufTab[i].Arg1); | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 399 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 400 | // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of | 
|  | 401 | // LHS, and 13 bits of RHS = 32 bits. | 
| Chris Lattner | 3dcfef6 | 2006-04-17 05:05:52 +0000 | [diff] [blame] | 402 | unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 403 |  | 
| Jim Grosbach | dba4775 | 2010-10-14 00:12:49 +0000 | [diff] [blame] | 404 | std::cout << "  " << std::setw(10) << Val << "U, // "; | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 405 | PrintMask(i, std::cout); | 
|  | 406 | std::cout << ": Cost " << ShufTab[i].Cost; | 
|  | 407 | std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy"); | 
|  | 408 | std::cout << " "; | 
|  | 409 | if (ShufTab[ShufTab[i].Arg0].Cost == 0) { | 
|  | 410 | std::cout << getZeroCostOpName(ShufTab[i].Arg0); | 
|  | 411 | } else { | 
|  | 412 | PrintMask(ShufTab[i].Arg0, std::cout); | 
|  | 413 | } | 
|  | 414 |  | 
|  | 415 | if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) { | 
|  | 416 | std::cout << ", "; | 
|  | 417 | if (ShufTab[ShufTab[i].Arg1].Cost == 0) { | 
|  | 418 | std::cout << getZeroCostOpName(ShufTab[i].Arg1); | 
|  | 419 | } else { | 
|  | 420 | PrintMask(ShufTab[i].Arg1, std::cout); | 
|  | 421 | } | 
|  | 422 | } | 
|  | 423 | std::cout << "\n"; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 424 | } | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 425 | std::cout << "  0\n};\n"; | 
|  | 426 |  | 
|  | 427 | if (0) { | 
|  | 428 | // Print out the table. | 
|  | 429 | for (unsigned i = 0; i != 0x8889; ++i) { | 
|  | 430 | if (!isValidMask(i)) continue; | 
|  | 431 | if (ShufTab[i].Cost < 1000) { | 
|  | 432 | PrintMask(i, std::cerr); | 
|  | 433 | std::cerr << " - Cost " << ShufTab[i].Cost << " - "; | 
| Anton Korobeynikov | b539e06 | 2009-08-21 12:39:38 +0000 | [diff] [blame] | 434 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 435 | unsigned short Vals[30]; | 
|  | 436 | unsigned NumVals = 0; | 
|  | 437 | EvaluateOps(i, Vals, NumVals); | 
|  | 438 |  | 
|  | 439 | for (unsigned j = 0, e = NumVals; j != e; ++j) | 
|  | 440 | PrintOperation(j, Vals); | 
|  | 441 | std::cerr << "\n"; | 
|  | 442 | } | 
|  | 443 | } | 
|  | 444 | } | 
|  | 445 | } | 
|  | 446 |  | 
|  | 447 |  | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 448 | #ifdef GENERATE_ALTIVEC | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 449 |  | 
|  | 450 | ///===---------------------------------------------------------------------===// | 
|  | 451 | /// The altivec instruction definitions.  This is the altivec-specific part of | 
|  | 452 | /// this file. | 
|  | 453 | ///===---------------------------------------------------------------------===// | 
|  | 454 |  | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 455 | // Note that the opcode numbers here must match those in the PPC backend. | 
|  | 456 | enum { | 
|  | 457 | OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> | 
|  | 458 | OP_VMRGHW, | 
|  | 459 | OP_VMRGLW, | 
|  | 460 | OP_VSPLTISW0, | 
|  | 461 | OP_VSPLTISW1, | 
|  | 462 | OP_VSPLTISW2, | 
|  | 463 | OP_VSPLTISW3, | 
|  | 464 | OP_VSLDOI4, | 
|  | 465 | OP_VSLDOI8, | 
| Chris Lattner | aa237256 | 2006-05-24 17:04:05 +0000 | [diff] [blame] | 466 | OP_VSLDOI12 | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 467 | }; | 
|  | 468 |  | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 469 | struct vmrghw : public Operator { | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 470 | vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {} | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 471 | } the_vmrghw; | 
|  | 472 |  | 
|  | 473 | struct vmrglw : public Operator { | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 474 | vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {} | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 475 | } the_vmrglw; | 
|  | 476 |  | 
|  | 477 | template<unsigned Elt> | 
|  | 478 | struct vspltisw : public Operator { | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 479 | vspltisw(const char *N, unsigned Opc) | 
|  | 480 | : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 481 | }; | 
|  | 482 |  | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 483 | vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0); | 
|  | 484 | vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1); | 
|  | 485 | vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2); | 
|  | 486 | vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3); | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 487 |  | 
|  | 488 | template<unsigned N> | 
|  | 489 | struct vsldoi : public Operator { | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 490 | vsldoi(const char *Name, unsigned Opc) | 
|  | 491 | : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 492 | } | 
|  | 493 | }; | 
|  | 494 |  | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 495 | vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4); | 
|  | 496 | vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8); | 
|  | 497 | vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12); | 
| Chris Lattner | 2951955 | 2006-04-17 00:30:41 +0000 | [diff] [blame] | 498 |  | 
| Chris Lattner | 895dba9 | 2006-04-17 00:47:18 +0000 | [diff] [blame] | 499 | #endif | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 500 |  | 
|  | 501 | #define GENERATE_NEON | 
|  | 502 |  | 
|  | 503 | #ifdef GENERATE_NEON | 
|  | 504 | enum { | 
|  | 505 | OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> | 
|  | 506 | OP_VREV, | 
|  | 507 | OP_VDUP0, | 
|  | 508 | OP_VDUP1, | 
|  | 509 | OP_VDUP2, | 
|  | 510 | OP_VDUP3, | 
|  | 511 | OP_VEXT1, | 
|  | 512 | OP_VEXT2, | 
|  | 513 | OP_VEXT3, | 
|  | 514 | OP_VUZPL, // VUZP, left result | 
|  | 515 | OP_VUZPR, // VUZP, right result | 
|  | 516 | OP_VZIPL, // VZIP, left result | 
|  | 517 | OP_VZIPR, // VZIP, right result | 
|  | 518 | OP_VTRNL, // VTRN, left result | 
|  | 519 | OP_VTRNR  // VTRN, right result | 
|  | 520 | }; | 
|  | 521 |  | 
|  | 522 | struct vrev : public Operator { | 
| Tanya Lattner | 48b182c | 2011-05-18 06:42:21 +0000 | [diff] [blame] | 523 | vrev() : Operator(0x1032, "vrev", OP_VREV) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 524 | } the_vrev; | 
|  | 525 |  | 
|  | 526 | template<unsigned Elt> | 
|  | 527 | struct vdup : public Operator { | 
|  | 528 | vdup(const char *N, unsigned Opc) | 
|  | 529 | : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} | 
|  | 530 | }; | 
|  | 531 |  | 
|  | 532 | vdup<0> the_vdup0("vdup0", OP_VDUP0); | 
|  | 533 | vdup<1> the_vdup1("vdup1", OP_VDUP1); | 
|  | 534 | vdup<2> the_vdup2("vdup2", OP_VDUP2); | 
|  | 535 | vdup<3> the_vdup3("vdup3", OP_VDUP3); | 
|  | 536 |  | 
|  | 537 | template<unsigned N> | 
|  | 538 | struct vext : public Operator { | 
|  | 539 | vext(const char *Name, unsigned Opc) | 
|  | 540 | : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { | 
|  | 541 | } | 
|  | 542 | }; | 
|  | 543 |  | 
|  | 544 | vext<1> the_vext1("vext1", OP_VEXT1); | 
|  | 545 | vext<2> the_vext2("vext2", OP_VEXT2); | 
|  | 546 | vext<3> the_vext3("vext3", OP_VEXT3); | 
|  | 547 |  | 
|  | 548 | struct vuzpl : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 549 | vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 550 | } the_vuzpl; | 
|  | 551 |  | 
|  | 552 | struct vuzpr : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 553 | vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 554 | } the_vuzpr; | 
|  | 555 |  | 
|  | 556 | struct vzipl : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 557 | vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 558 | } the_vzipl; | 
|  | 559 |  | 
|  | 560 | struct vzipr : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 561 | vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 562 | } the_vzipr; | 
|  | 563 |  | 
|  | 564 | struct vtrnl : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 565 | vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 566 | } the_vtrnl; | 
|  | 567 |  | 
|  | 568 | struct vtrnr : public Operator { | 
| Anton Korobeynikov | 8493c5d | 2009-09-25 22:52:29 +0000 | [diff] [blame] | 569 | vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {} | 
| Anton Korobeynikov | 9a232f4 | 2009-08-21 12:41:24 +0000 | [diff] [blame] | 570 | } the_vtrnr; | 
|  | 571 |  | 
|  | 572 | #endif |