Daniel Dunbar | 3f6e3ff | 2009-07-11 19:39:44 +0000 | [diff] [blame] | 1 | //===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This tablegen backend emits a target specifier matcher for converting parsed |
| 11 | // assembly operands in the MCInst structures. |
| 12 | // |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 13 | // The input to the target specific matcher is a list of literal tokens and |
| 14 | // operands. The target specific parser should generally eliminate any syntax |
| 15 | // which is not relevant for matching; for example, comma tokens should have |
| 16 | // already been consumed and eliminated by the parser. Most instructions will |
| 17 | // end up with a single literal token (the instruction name) and some number of |
| 18 | // operands. |
| 19 | // |
| 20 | // Some example inputs, for X86: |
| 21 | // 'addl' (immediate ...) (register ...) |
| 22 | // 'add' (immediate ...) (memory ...) |
| 23 | // 'call' '*' %epc |
| 24 | // |
| 25 | // The assembly matcher is responsible for converting this input into a precise |
| 26 | // machine instruction (i.e., an instruction with a well defined encoding). This |
| 27 | // mapping has several properties which complicate matching: |
| 28 | // |
| 29 | // - It may be ambiguous; many architectures can legally encode particular |
| 30 | // variants of an instruction in different ways (for example, using a smaller |
| 31 | // encoding for small immediates). Such ambiguities should never be |
| 32 | // arbitrarily resolved by the assembler, the assembler is always responsible |
| 33 | // for choosing the "best" available instruction. |
| 34 | // |
| 35 | // - It may depend on the subtarget or the assembler context. Instructions |
| 36 | // which are invalid for the current mode, but otherwise unambiguous (e.g., |
| 37 | // an SSE instruction in a file being assembled for i486) should be accepted |
| 38 | // and rejected by the assembler front end. However, if the proper encoding |
| 39 | // for an instruction is dependent on the assembler context then the matcher |
| 40 | // is responsible for selecting the correct machine instruction for the |
| 41 | // current mode. |
| 42 | // |
| 43 | // The core matching algorithm attempts to exploit the regularity in most |
| 44 | // instruction sets to quickly determine the set of possibly matching |
| 45 | // instructions, and the simplify the generated code. Additionally, this helps |
| 46 | // to ensure that the ambiguities are intentionally resolved by the user. |
| 47 | // |
| 48 | // The matching is divided into two distinct phases: |
| 49 | // |
| 50 | // 1. Classification: Each operand is mapped to the unique set which (a) |
| 51 | // contains it, and (b) is the largest such subset for which a single |
| 52 | // instruction could match all members. |
| 53 | // |
| 54 | // For register classes, we can generate these subgroups automatically. For |
| 55 | // arbitrary operands, we expect the user to define the classes and their |
| 56 | // relations to one another (for example, 8-bit signed immediates as a |
| 57 | // subset of 32-bit immediates). |
| 58 | // |
| 59 | // By partitioning the operands in this way, we guarantee that for any |
| 60 | // tuple of classes, any single instruction must match either all or none |
| 61 | // of the sets of operands which could classify to that tuple. |
| 62 | // |
| 63 | // In addition, the subset relation amongst classes induces a partial order |
| 64 | // on such tuples, which we use to resolve ambiguities. |
| 65 | // |
| 66 | // FIXME: What do we do if a crazy case shows up where this is the wrong |
| 67 | // resolution? |
| 68 | // |
| 69 | // 2. The input can now be treated as a tuple of classes (static tokens are |
| 70 | // simple singleton sets). Each such tuple should generally map to a single |
| 71 | // instruction (we currently ignore cases where this isn't true, whee!!!), |
| 72 | // which we can emit a simple matcher for. |
| 73 | // |
Daniel Dunbar | 3f6e3ff | 2009-07-11 19:39:44 +0000 | [diff] [blame] | 74 | //===----------------------------------------------------------------------===// |
| 75 | |
| 76 | #include "AsmMatcherEmitter.h" |
| 77 | #include "CodeGenTarget.h" |
| 78 | #include "Record.h" |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 79 | #include "llvm/ADT/OwningPtr.h" |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 80 | #include "llvm/ADT/SmallVector.h" |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 81 | #include "llvm/ADT/StringExtras.h" |
| 82 | #include "llvm/Support/CommandLine.h" |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 83 | #include "llvm/Support/Debug.h" |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 84 | #include <list> |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 85 | #include <map> |
| 86 | #include <set> |
Daniel Dunbar | 3f6e3ff | 2009-07-11 19:39:44 +0000 | [diff] [blame] | 87 | using namespace llvm; |
| 88 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 89 | namespace { |
Daniel Dunbar | 62beebc | 2009-08-07 20:33:39 +0000 | [diff] [blame] | 90 | static cl::opt<std::string> |
| 91 | MatchOneInstr("match-one-instr", cl::desc("Match only the named instruction"), |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 92 | cl::init("")); |
| 93 | } |
| 94 | |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 95 | /// FlattenVariants - Flatten an .td file assembly string by selecting the |
| 96 | /// variant at index \arg N. |
| 97 | static std::string FlattenVariants(const std::string &AsmString, |
| 98 | unsigned N) { |
| 99 | StringRef Cur = AsmString; |
| 100 | std::string Res = ""; |
| 101 | |
| 102 | for (;;) { |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 103 | // Find the start of the next variant string. |
| 104 | size_t VariantsStart = 0; |
| 105 | for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart) |
| 106 | if (Cur[VariantsStart] == '{' && |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 107 | (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' && |
| 108 | Cur[VariantsStart-1] != '\\'))) |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 109 | break; |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 110 | |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 111 | // Add the prefix to the result. |
| 112 | Res += Cur.slice(0, VariantsStart); |
| 113 | if (VariantsStart == Cur.size()) |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 114 | break; |
| 115 | |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 116 | ++VariantsStart; // Skip the '{'. |
| 117 | |
| 118 | // Scan to the end of the variants string. |
| 119 | size_t VariantsEnd = VariantsStart; |
| 120 | unsigned NestedBraces = 1; |
| 121 | for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) { |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 122 | if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') { |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 123 | if (--NestedBraces == 0) |
| 124 | break; |
| 125 | } else if (Cur[VariantsEnd] == '{') |
| 126 | ++NestedBraces; |
| 127 | } |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 128 | |
| 129 | // Select the Nth variant (or empty). |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 130 | StringRef Selection = Cur.slice(VariantsStart, VariantsEnd); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 131 | for (unsigned i = 0; i != N; ++i) |
| 132 | Selection = Selection.split('|').second; |
| 133 | Res += Selection.split('|').first; |
| 134 | |
Daniel Dunbar | 815c7ab | 2009-08-04 20:36:45 +0000 | [diff] [blame] | 135 | assert(VariantsEnd != Cur.size() && |
| 136 | "Unterminated variants in assembly string!"); |
| 137 | Cur = Cur.substr(VariantsEnd + 1); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 138 | } |
| 139 | |
| 140 | return Res; |
| 141 | } |
| 142 | |
| 143 | /// TokenizeAsmString - Tokenize a simplified assembly string. |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 144 | static void TokenizeAsmString(const StringRef &AsmString, |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 145 | SmallVectorImpl<StringRef> &Tokens) { |
| 146 | unsigned Prev = 0; |
| 147 | bool InTok = true; |
| 148 | for (unsigned i = 0, e = AsmString.size(); i != e; ++i) { |
| 149 | switch (AsmString[i]) { |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 150 | case '[': |
| 151 | case ']': |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 152 | case '*': |
| 153 | case '!': |
| 154 | case ' ': |
| 155 | case '\t': |
| 156 | case ',': |
| 157 | if (InTok) { |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 158 | Tokens.push_back(AsmString.slice(Prev, i)); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 159 | InTok = false; |
| 160 | } |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 161 | if (!isspace(AsmString[i]) && AsmString[i] != ',') |
| 162 | Tokens.push_back(AsmString.substr(i, 1)); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 163 | Prev = i + 1; |
| 164 | break; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 165 | |
| 166 | case '\\': |
| 167 | if (InTok) { |
| 168 | Tokens.push_back(AsmString.slice(Prev, i)); |
| 169 | InTok = false; |
| 170 | } |
| 171 | ++i; |
| 172 | assert(i != AsmString.size() && "Invalid quoted character"); |
| 173 | Tokens.push_back(AsmString.substr(i, 1)); |
| 174 | Prev = i + 1; |
| 175 | break; |
| 176 | |
| 177 | case '$': { |
| 178 | // If this isn't "${", treat like a normal token. |
| 179 | if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') { |
| 180 | if (InTok) { |
| 181 | Tokens.push_back(AsmString.slice(Prev, i)); |
| 182 | InTok = false; |
| 183 | } |
| 184 | Prev = i; |
| 185 | break; |
| 186 | } |
| 187 | |
| 188 | if (InTok) { |
| 189 | Tokens.push_back(AsmString.slice(Prev, i)); |
| 190 | InTok = false; |
| 191 | } |
| 192 | |
| 193 | StringRef::iterator End = |
| 194 | std::find(AsmString.begin() + i, AsmString.end(), '}'); |
| 195 | assert(End != AsmString.end() && "Missing brace in operand reference!"); |
| 196 | size_t EndPos = End - AsmString.begin(); |
| 197 | Tokens.push_back(AsmString.slice(i, EndPos+1)); |
| 198 | Prev = EndPos + 1; |
| 199 | i = EndPos; |
| 200 | break; |
| 201 | } |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 202 | |
| 203 | default: |
| 204 | InTok = true; |
| 205 | } |
| 206 | } |
| 207 | if (InTok && Prev != AsmString.size()) |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 208 | Tokens.push_back(AsmString.substr(Prev)); |
| 209 | } |
| 210 | |
| 211 | static bool IsAssemblerInstruction(const StringRef &Name, |
| 212 | const CodeGenInstruction &CGI, |
| 213 | const SmallVectorImpl<StringRef> &Tokens) { |
| 214 | // Ignore psuedo ops. |
| 215 | // |
| 216 | // FIXME: This is a hack. |
| 217 | if (const RecordVal *Form = CGI.TheDef->getValue("Form")) |
| 218 | if (Form->getValue()->getAsString() == "Pseudo") |
| 219 | return false; |
| 220 | |
| 221 | // Ignore "PHI" node. |
| 222 | // |
| 223 | // FIXME: This is also a hack. |
| 224 | if (Name == "PHI") |
| 225 | return false; |
| 226 | |
| 227 | // Ignore instructions with no .s string. |
| 228 | // |
| 229 | // FIXME: What are these? |
| 230 | if (CGI.AsmString.empty()) |
| 231 | return false; |
| 232 | |
| 233 | // FIXME: Hack; ignore any instructions with a newline in them. |
| 234 | if (std::find(CGI.AsmString.begin(), |
| 235 | CGI.AsmString.end(), '\n') != CGI.AsmString.end()) |
| 236 | return false; |
| 237 | |
| 238 | // Ignore instructions with attributes, these are always fake instructions for |
| 239 | // simplifying codegen. |
| 240 | // |
| 241 | // FIXME: Is this true? |
| 242 | // |
| 243 | // Also, we ignore instructions which reference the operand multiple times; |
| 244 | // this implies a constraint we would not currently honor. These are |
| 245 | // currently always fake instructions for simplifying codegen. |
| 246 | // |
| 247 | // FIXME: Encode this assumption in the .td, so we can error out here. |
| 248 | std::set<std::string> OperandNames; |
| 249 | for (unsigned i = 1, e = Tokens.size(); i < e; ++i) { |
| 250 | if (Tokens[i][0] == '$' && |
| 251 | std::find(Tokens[i].begin(), |
| 252 | Tokens[i].end(), ':') != Tokens[i].end()) { |
| 253 | DEBUG({ |
| 254 | errs() << "warning: '" << Name << "': " |
| 255 | << "ignoring instruction; operand with attribute '" |
| 256 | << Tokens[i] << "', \n"; |
| 257 | }); |
| 258 | return false; |
| 259 | } |
| 260 | |
| 261 | if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) { |
| 262 | DEBUG({ |
| 263 | errs() << "warning: '" << Name << "': " |
| 264 | << "ignoring instruction; tied operand '" |
| 265 | << Tokens[i] << "', \n"; |
| 266 | }); |
| 267 | return false; |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | return true; |
| 272 | } |
| 273 | |
| 274 | namespace { |
| 275 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 276 | /// InstructionInfo - Helper class for storing the necessary information for an |
| 277 | /// instruction which is capable of being matched. |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 278 | struct InstructionInfo { |
| 279 | struct Operand { |
| 280 | enum { |
| 281 | Token, |
| 282 | Class |
| 283 | } Kind; |
| 284 | |
| 285 | struct ClassData { |
| 286 | /// Operand - The tablegen operand this class corresponds to. |
| 287 | const CodeGenInstruction::OperandInfo *Operand; |
| 288 | |
| 289 | /// ClassName - The name of this operand's class. |
| 290 | std::string ClassName; |
| 291 | |
| 292 | /// PredicateMethod - The name of the operand method to test whether the |
| 293 | /// operand matches this class. |
| 294 | std::string PredicateMethod; |
| 295 | |
| 296 | /// RenderMethod - The name of the operand method to add this operand to |
| 297 | /// an MCInst. |
| 298 | std::string RenderMethod; |
| 299 | } AsClass; |
| 300 | }; |
| 301 | |
| 302 | /// InstrName - The target name for this instruction. |
| 303 | std::string InstrName; |
| 304 | |
| 305 | /// Instr - The instruction this matches. |
| 306 | const CodeGenInstruction *Instr; |
| 307 | |
| 308 | /// AsmString - The assembly string for this instruction (with variants |
| 309 | /// removed). |
| 310 | std::string AsmString; |
| 311 | |
| 312 | /// Tokens - The tokenized assembly pattern that this instruction matches. |
| 313 | SmallVector<StringRef, 4> Tokens; |
| 314 | |
| 315 | /// Operands - The operands that this instruction matches. |
| 316 | SmallVector<Operand, 4> Operands; |
| 317 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 318 | /// ConversionFnKind - The enum value which is passed to the generated |
| 319 | /// ConvertToMCInst to convert parsed operands into an MCInst for this |
| 320 | /// function. |
| 321 | std::string ConversionFnKind; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 322 | |
| 323 | public: |
| 324 | void dump(); |
| 325 | }; |
| 326 | |
| 327 | } |
| 328 | |
| 329 | void InstructionInfo::dump() { |
| 330 | errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"' |
| 331 | << ", tokens:["; |
| 332 | for (unsigned i = 0, e = Tokens.size(); i != e; ++i) { |
| 333 | errs() << Tokens[i]; |
| 334 | if (i + 1 != e) |
| 335 | errs() << ", "; |
| 336 | } |
| 337 | errs() << "]\n"; |
| 338 | |
| 339 | for (unsigned i = 0, e = Operands.size(); i != e; ++i) { |
| 340 | Operand &Op = Operands[i]; |
| 341 | errs() << " op[" << i << "] = "; |
| 342 | if (Op.Kind == Operand::Token) { |
| 343 | errs() << '\"' << Tokens[i] << "\"\n"; |
| 344 | continue; |
| 345 | } |
| 346 | |
| 347 | assert(Op.Kind == Operand::Class && "Invalid kind!"); |
| 348 | const CodeGenInstruction::OperandInfo &OI = *Op.AsClass.Operand; |
| 349 | errs() << OI.Name << " " << OI.Rec->getName() |
| 350 | << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n"; |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | static void BuildInstructionInfos(CodeGenTarget &Target, |
| 355 | std::vector<InstructionInfo*> &Infos) { |
| 356 | const std::map<std::string, CodeGenInstruction> &Instructions = |
| 357 | Target.getInstructions(); |
| 358 | |
| 359 | for (std::map<std::string, CodeGenInstruction>::const_iterator |
| 360 | it = Instructions.begin(), ie = Instructions.end(); it != ie; ++it) { |
| 361 | const CodeGenInstruction &CGI = it->second; |
| 362 | |
| 363 | if (!MatchOneInstr.empty() && it->first != MatchOneInstr) |
| 364 | continue; |
| 365 | |
| 366 | OwningPtr<InstructionInfo> II(new InstructionInfo); |
| 367 | |
| 368 | II->InstrName = it->first; |
| 369 | II->Instr = &it->second; |
| 370 | II->AsmString = FlattenVariants(CGI.AsmString, 0); |
| 371 | |
| 372 | TokenizeAsmString(II->AsmString, II->Tokens); |
| 373 | |
| 374 | // Ignore instructions which shouldn't be matched. |
| 375 | if (!IsAssemblerInstruction(it->first, CGI, II->Tokens)) |
| 376 | continue; |
| 377 | |
| 378 | for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) { |
| 379 | StringRef Token = II->Tokens[i]; |
| 380 | |
| 381 | // Check for simple tokens. |
| 382 | if (Token[0] != '$') { |
| 383 | InstructionInfo::Operand Op; |
| 384 | Op.Kind = InstructionInfo::Operand::Token; |
| 385 | II->Operands.push_back(Op); |
| 386 | continue; |
| 387 | } |
| 388 | |
| 389 | // Otherwise this is an operand reference. |
| 390 | InstructionInfo::Operand Op; |
| 391 | Op.Kind = InstructionInfo::Operand::Class; |
| 392 | |
| 393 | StringRef OperandName; |
| 394 | if (Token[1] == '{') |
| 395 | OperandName = Token.substr(2, Token.size() - 3); |
| 396 | else |
| 397 | OperandName = Token.substr(1); |
| 398 | |
| 399 | // Map this token to an operand. FIXME: Move elsewhere. |
| 400 | unsigned Idx; |
| 401 | try { |
| 402 | Idx = CGI.getOperandNamed(OperandName); |
| 403 | } catch(...) { |
| 404 | errs() << "error: unable to find operand: '" << OperandName << "'!\n"; |
| 405 | break; |
| 406 | } |
| 407 | |
| 408 | const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx]; |
| 409 | Op.AsClass.Operand = &OI; |
| 410 | |
| 411 | if (OI.Rec->isSubClassOf("RegisterClass")) { |
| 412 | Op.AsClass.ClassName = "Reg"; |
| 413 | Op.AsClass.PredicateMethod = "isReg"; |
| 414 | Op.AsClass.RenderMethod = "addRegOperands"; |
| 415 | } else if (OI.Rec->isSubClassOf("Operand")) { |
| 416 | // FIXME: This should not be hard coded. |
| 417 | const RecordVal *RV = OI.Rec->getValue("Type"); |
| 418 | |
| 419 | // FIXME: Yet another total hack. |
| 420 | if (RV->getValue()->getAsString() == "iPTR" || |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 421 | OI.Rec->getName() == "i8mem_NOREX" || |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 422 | OI.Rec->getName() == "lea32mem" || |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 423 | OI.Rec->getName() == "lea64mem" || |
| 424 | OI.Rec->getName() == "i128mem" || |
| 425 | OI.Rec->getName() == "sdmem" || |
| 426 | OI.Rec->getName() == "ssmem" || |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 427 | OI.Rec->getName() == "lea64_32mem") { |
| 428 | Op.AsClass.ClassName = "Mem"; |
| 429 | Op.AsClass.PredicateMethod = "isMem"; |
| 430 | Op.AsClass.RenderMethod = "addMemOperands"; |
| 431 | } else { |
| 432 | Op.AsClass.ClassName = "Imm"; |
| 433 | Op.AsClass.PredicateMethod = "isImm"; |
| 434 | Op.AsClass.RenderMethod = "addImmOperands"; |
| 435 | } |
| 436 | } else { |
| 437 | OI.Rec->dump(); |
| 438 | assert(0 && "Unexpected instruction operand record!"); |
| 439 | } |
| 440 | |
| 441 | II->Operands.push_back(Op); |
| 442 | } |
| 443 | |
| 444 | // If we broke out, ignore the instruction. |
| 445 | if (II->Operands.size() != II->Tokens.size()) |
| 446 | continue; |
| 447 | |
| 448 | Infos.push_back(II.take()); |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | static void ConstructConversionFunctions(CodeGenTarget &Target, |
| 453 | std::vector<InstructionInfo*> &Infos, |
| 454 | raw_ostream &OS) { |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 455 | // Write the convert function to a separate stream, so we can drop it after |
| 456 | // the enum. |
| 457 | std::string ConvertFnBody; |
| 458 | raw_string_ostream CvtOS(ConvertFnBody); |
| 459 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 460 | // Function we have already generated. |
| 461 | std::set<std::string> GeneratedFns; |
| 462 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 463 | // Start the unified conversion function. |
| 464 | |
| 465 | CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, " |
| 466 | << "unsigned Opcode,\n" |
| 467 | << " SmallVectorImpl<" |
| 468 | << Target.getName() << "Operand> &Operands) {\n"; |
| 469 | CvtOS << " Inst.setOpcode(Opcode);\n"; |
| 470 | CvtOS << " switch (Kind) {\n"; |
| 471 | CvtOS << " default:\n"; |
| 472 | |
| 473 | // Start the enum, which we will generate inline. |
| 474 | |
| 475 | OS << "// Unified function for converting operants to MCInst instances.\n\n"; |
| 476 | |
| 477 | OS << "namespace {\n\n"; |
| 478 | |
| 479 | OS << "enum ConversionKind {\n"; |
| 480 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 481 | for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(), |
| 482 | ie = Infos.end(); it != ie; ++it) { |
| 483 | InstructionInfo &II = **it; |
| 484 | |
| 485 | // Order the (class) operands by the order to convert them into an MCInst. |
| 486 | SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList; |
| 487 | for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) { |
| 488 | InstructionInfo::Operand &Op = II.Operands[i]; |
| 489 | if (Op.Kind == InstructionInfo::Operand::Class) |
| 490 | MIOperandList.push_back(std::make_pair(Op.AsClass.Operand->MIOperandNo, |
| 491 | i)); |
| 492 | } |
| 493 | std::sort(MIOperandList.begin(), MIOperandList.end()); |
| 494 | |
| 495 | // Compute the total number of operands. |
| 496 | unsigned NumMIOperands = 0; |
| 497 | for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) { |
| 498 | const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i]; |
| 499 | NumMIOperands = std::max(NumMIOperands, |
| 500 | OI.MIOperandNo + OI.MINumOperands); |
| 501 | } |
| 502 | |
| 503 | // Build the conversion function signature. |
| 504 | std::string Signature = "Convert"; |
| 505 | unsigned CurIndex = 0; |
| 506 | for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) { |
| 507 | InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second]; |
| 508 | assert(CurIndex <= Op.AsClass.Operand->MIOperandNo && |
| 509 | "Duplicate match for instruction operand!"); |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 510 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 511 | Signature += "_"; |
| 512 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 513 | // Skip operands which weren't matched by anything, this occurs when the |
| 514 | // .td file encodes "implicit" operands as explicit ones. |
| 515 | // |
| 516 | // FIXME: This should be removed from the MCInst structure. |
| 517 | for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex) |
| 518 | Signature += "Imp"; |
| 519 | |
| 520 | Signature += Op.AsClass.ClassName; |
| 521 | Signature += utostr(Op.AsClass.Operand->MINumOperands); |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 522 | Signature += "_" + utostr(MIOperandList[i].second); |
| 523 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 524 | CurIndex += Op.AsClass.Operand->MINumOperands; |
| 525 | } |
| 526 | |
| 527 | // Add any trailing implicit operands. |
| 528 | for (; CurIndex != NumMIOperands; ++CurIndex) |
| 529 | Signature += "Imp"; |
| 530 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 531 | II.ConversionFnKind = Signature; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 532 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 533 | // Check if we have already generated this signature. |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 534 | if (!GeneratedFns.insert(Signature).second) |
| 535 | continue; |
| 536 | |
| 537 | // If not, emit it now. |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 538 | |
| 539 | // Add to the enum list. |
| 540 | OS << " " << Signature << ",\n"; |
| 541 | |
| 542 | // And to the convert function. |
| 543 | CvtOS << " case " << Signature << ":\n"; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 544 | CurIndex = 0; |
| 545 | for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) { |
| 546 | InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second]; |
| 547 | |
| 548 | // Add the implicit operands. |
| 549 | for (; CurIndex != Op.AsClass.Operand->MIOperandNo; ++CurIndex) |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 550 | CvtOS << " Inst.addOperand(MCOperand::CreateReg(0));\n"; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 551 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 552 | CvtOS << " Operands[" << MIOperandList[i].second |
| 553 | << "]." << Op.AsClass.RenderMethod |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 554 | << "(Inst, " << Op.AsClass.Operand->MINumOperands << ");\n"; |
| 555 | CurIndex += Op.AsClass.Operand->MINumOperands; |
| 556 | } |
| 557 | |
| 558 | // And add trailing implicit operands. |
| 559 | for (; CurIndex != NumMIOperands; ++CurIndex) |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 560 | CvtOS << " Inst.addOperand(MCOperand::CreateReg(0));\n"; |
| 561 | CvtOS << " break;\n"; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 562 | } |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 563 | |
| 564 | // Finish the convert function. |
| 565 | |
| 566 | CvtOS << " }\n"; |
| 567 | CvtOS << " return false;\n"; |
| 568 | CvtOS << "}\n\n"; |
| 569 | |
| 570 | // Finish the enum, and drop the convert function after it. |
| 571 | |
| 572 | OS << " NumConversionVariants\n"; |
| 573 | OS << "};\n\n"; |
| 574 | |
| 575 | OS << "}\n\n"; |
| 576 | |
| 577 | OS << CvtOS.str(); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 578 | } |
| 579 | |
Daniel Dunbar | 79f302e | 2009-08-07 21:01:44 +0000 | [diff] [blame] | 580 | /// EmitMatchRegisterName - Emit the function to match a string to the target |
| 581 | /// specific register enum. |
| 582 | static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser, |
| 583 | raw_ostream &OS) { |
Daniel Dunbar | 2f9876b | 2009-07-17 18:51:11 +0000 | [diff] [blame] | 584 | const std::vector<CodeGenRegister> &Registers = Target.getRegisters(); |
| 585 | |
Daniel Dunbar | 79f302e | 2009-08-07 21:01:44 +0000 | [diff] [blame] | 586 | OS << "bool " << Target.getName() |
| 587 | << AsmParser->getValueAsString("AsmParserClassName") |
Daniel Dunbar | 85f1b39 | 2009-07-29 00:02:19 +0000 | [diff] [blame] | 588 | << "::MatchRegisterName(const StringRef &Name, unsigned &RegNo) {\n"; |
Daniel Dunbar | 2f9876b | 2009-07-17 18:51:11 +0000 | [diff] [blame] | 589 | |
| 590 | // FIXME: TableGen should have a fast string matcher generator. |
| 591 | for (unsigned i = 0, e = Registers.size(); i != e; ++i) { |
| 592 | const CodeGenRegister &Reg = Registers[i]; |
| 593 | if (Reg.TheDef->getValueAsString("AsmName").empty()) |
| 594 | continue; |
| 595 | |
| 596 | OS << " if (Name == \"" |
| 597 | << Reg.TheDef->getValueAsString("AsmName") << "\")\n" |
| 598 | << " return RegNo=" << i + 1 << ", false;\n"; |
| 599 | } |
| 600 | OS << " return true;\n"; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 601 | OS << "}\n\n"; |
Daniel Dunbar | 79f302e | 2009-08-07 21:01:44 +0000 | [diff] [blame] | 602 | } |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 603 | |
Daniel Dunbar | 79f302e | 2009-08-07 21:01:44 +0000 | [diff] [blame] | 604 | void AsmMatcherEmitter::run(raw_ostream &OS) { |
| 605 | CodeGenTarget Target; |
| 606 | Record *AsmParser = Target.getAsmParser(); |
| 607 | std::string ClassName = AsmParser->getValueAsString("AsmParserClassName"); |
| 608 | |
| 609 | EmitSourceFileHeader("Assembly Matcher Source Fragment", OS); |
| 610 | |
| 611 | // Emit the function to match a register name to number. |
| 612 | EmitMatchRegisterName(Target, AsmParser, OS); |
| 613 | |
| 614 | // Compute the information on the list of instructions to match. |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 615 | std::vector<InstructionInfo*> Infos; |
| 616 | BuildInstructionInfos(Target, Infos); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 617 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 618 | DEBUG_WITH_TYPE("instruction_info", { |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 619 | for (std::vector<InstructionInfo*>::iterator it = Infos.begin(), |
| 620 | ie = Infos.end(); it != ie; ++it) |
| 621 | (*it)->dump(); |
| 622 | }); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 623 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 624 | // FIXME: At this point we should be able to totally order Infos, if not then |
| 625 | // we have an ambiguity which the .td file should be forced to resolve. |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 626 | |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 627 | // Generate the terminal actions to convert operands into an MCInst. |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 628 | ConstructConversionFunctions(Target, Infos, OS); |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 629 | |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 630 | // Build a very stupid version of the match function which just checks each |
| 631 | // instruction in order. |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 632 | |
| 633 | OS << "bool " << Target.getName() << ClassName |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 634 | << "::MatchInstruction(" |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 635 | << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, " |
| 636 | << "MCInst &Inst) {\n"; |
Daniel Dunbar | fe6759e | 2009-08-07 08:26:05 +0000 | [diff] [blame] | 637 | |
| 638 | for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(), |
| 639 | ie = Infos.end(); it != ie; ++it) { |
| 640 | InstructionInfo &II = **it; |
| 641 | |
| 642 | // The parser is expected to arrange things so that each "token" matches |
| 643 | // exactly one target specific operand. |
| 644 | OS << " if (Operands.size() == " << II.Operands.size(); |
| 645 | for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) { |
| 646 | InstructionInfo::Operand &Op = II.Operands[i]; |
| 647 | |
| 648 | OS << " &&\n"; |
| 649 | OS << " "; |
| 650 | |
| 651 | if (Op.Kind == InstructionInfo::Operand::Token) |
| 652 | OS << "Operands[" << i << "].isToken(\"" << II.Tokens[i] << "\")"; |
| 653 | else |
| 654 | OS << "Operands[" << i << "]." |
| 655 | << Op.AsClass.PredicateMethod << "()"; |
| 656 | } |
| 657 | OS << ")\n"; |
Daniel Dunbar | ce82b99 | 2009-08-08 05:24:34 +0000 | [diff] [blame^] | 658 | OS << " return ConvertToMCInst(" << II.ConversionFnKind << ", Inst, " |
| 659 | << Target.getName() << "::" << II.InstrName |
| 660 | << ", Operands);\n\n"; |
Daniel Dunbar | a54716c | 2009-07-31 02:32:59 +0000 | [diff] [blame] | 661 | } |
| 662 | |
| 663 | OS << " return true;\n"; |
| 664 | OS << "}\n\n"; |
Daniel Dunbar | 3f6e3ff | 2009-07-11 19:39:44 +0000 | [diff] [blame] | 665 | } |