Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 1 | //===- Reader.cpp - Code to read bytecode files ---------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by the LLVM research group and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This library implements the functionality defined in llvm/Bytecode/Reader.h |
| 11 | // |
| 12 | // Note that this library should be as fast as possible, reentrant, and |
| 13 | // threadsafe!! |
| 14 | // |
| 15 | // TODO: Allow passing in an option to ignore the symbol table |
| 16 | // |
| 17 | //===----------------------------------------------------------------------===// |
| 18 | |
| 19 | #include "AnalyzerInternals.h" |
Reid Spencer | 926572c | 2004-06-09 06:14:52 +0000 | [diff] [blame] | 20 | #include "ReaderPrimitives.h" |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 21 | #include "llvm/Module.h" |
| 22 | #include "llvm/Bytecode/Format.h" |
| 23 | #include "Support/StringExtras.h" |
| 24 | #include <iostream> |
| 25 | #include <sstream> |
| 26 | |
| 27 | using namespace llvm; |
| 28 | |
Reid Spencer | 926572c | 2004-06-09 06:14:52 +0000 | [diff] [blame] | 29 | // Enable to trace to figure out what the heck is going on when parsing fails |
| 30 | //#define TRACE_LEVEL 10 |
| 31 | //#define DEBUG_OUTPUT |
| 32 | |
| 33 | #if TRACE_LEVEL // ByteCodeReading_TRACEr |
| 34 | #define BCR_TRACE(n, X) \ |
| 35 | if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X |
| 36 | #else |
| 37 | #define BCR_TRACE(n, X) |
| 38 | #endif |
| 39 | |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 40 | #define PARSE_ERROR(inserters) \ |
| 41 | { \ |
| 42 | std::ostringstream errormsg; \ |
| 43 | errormsg << inserters; \ |
| 44 | if ( ! handler->handleError( errormsg.str() ) ) \ |
| 45 | throw std::string(errormsg.str()); \ |
| 46 | } |
| 47 | |
Reid Spencer | 926572c | 2004-06-09 06:14:52 +0000 | [diff] [blame] | 48 | |
| 49 | inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf, |
| 50 | const unsigned char *EndBuf, |
| 51 | unsigned &Type, unsigned &Size) |
| 52 | { |
| 53 | Type = read(Buf, EndBuf); |
| 54 | Size = read(Buf, EndBuf); |
| 55 | } |
| 56 | |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 57 | const Type *AbstractBytecodeParser::getType(unsigned ID) { |
| 58 | //cerr << "Looking up Type ID: " << ID << "\n"; |
| 59 | |
| 60 | if (ID < Type::FirstDerivedTyID) |
| 61 | if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) |
| 62 | return T; // Asked for a primitive type... |
| 63 | |
| 64 | // Otherwise, derived types need offset... |
| 65 | ID -= Type::FirstDerivedTyID; |
| 66 | |
| 67 | if (!CompactionTypeTable.empty()) { |
| 68 | if (ID >= CompactionTypeTable.size()) |
| 69 | PARSE_ERROR("Type ID out of range for compaction table!"); |
| 70 | return CompactionTypeTable[ID]; |
| 71 | } |
| 72 | |
| 73 | // Is it a module-level type? |
| 74 | if (ID < ModuleTypes.size()) |
| 75 | return ModuleTypes[ID].get(); |
| 76 | |
| 77 | // Nope, is it a function-level type? |
| 78 | ID -= ModuleTypes.size(); |
| 79 | if (ID < FunctionTypes.size()) |
| 80 | return FunctionTypes[ID].get(); |
| 81 | |
| 82 | PARSE_ERROR("Illegal type reference!"); |
| 83 | return Type::VoidTy; |
| 84 | } |
| 85 | |
| 86 | bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, |
| 87 | std::vector<unsigned> &Operands) { |
| 88 | Operands.clear(); |
| 89 | unsigned iType = 0; |
| 90 | unsigned Opcode = 0; |
| 91 | unsigned Op = read(Buf, EndBuf); |
| 92 | |
| 93 | // bits Instruction format: Common to all formats |
| 94 | // -------------------------- |
| 95 | // 01-00: Opcode type, fixed to 1. |
| 96 | // 07-02: Opcode |
| 97 | Opcode = (Op >> 2) & 63; |
| 98 | Operands.resize((Op >> 0) & 03); |
| 99 | |
| 100 | switch (Operands.size()) { |
| 101 | case 1: |
| 102 | // bits Instruction format: |
| 103 | // -------------------------- |
| 104 | // 19-08: Resulting type plane |
| 105 | // 31-20: Operand #1 (if set to (2^12-1), then zero operands) |
| 106 | // |
| 107 | iType = (Op >> 8) & 4095; |
| 108 | Operands[0] = (Op >> 20) & 4095; |
| 109 | if (Operands[0] == 4095) // Handle special encoding for 0 operands... |
| 110 | Operands.resize(0); |
| 111 | break; |
| 112 | case 2: |
| 113 | // bits Instruction format: |
| 114 | // -------------------------- |
| 115 | // 15-08: Resulting type plane |
| 116 | // 23-16: Operand #1 |
| 117 | // 31-24: Operand #2 |
| 118 | // |
| 119 | iType = (Op >> 8) & 255; |
| 120 | Operands[0] = (Op >> 16) & 255; |
| 121 | Operands[1] = (Op >> 24) & 255; |
| 122 | break; |
| 123 | case 3: |
| 124 | // bits Instruction format: |
| 125 | // -------------------------- |
| 126 | // 13-08: Resulting type plane |
| 127 | // 19-14: Operand #1 |
| 128 | // 25-20: Operand #2 |
| 129 | // 31-26: Operand #3 |
| 130 | // |
| 131 | iType = (Op >> 8) & 63; |
| 132 | Operands[0] = (Op >> 14) & 63; |
| 133 | Operands[1] = (Op >> 20) & 63; |
| 134 | Operands[2] = (Op >> 26) & 63; |
| 135 | break; |
| 136 | case 0: |
| 137 | Buf -= 4; // Hrm, try this again... |
| 138 | Opcode = read_vbr_uint(Buf, EndBuf); |
| 139 | Opcode >>= 2; |
| 140 | iType = read_vbr_uint(Buf, EndBuf); |
| 141 | |
| 142 | unsigned NumOperands = read_vbr_uint(Buf, EndBuf); |
| 143 | Operands.resize(NumOperands); |
| 144 | |
| 145 | if (NumOperands == 0) |
| 146 | PARSE_ERROR("Zero-argument instruction found; this is invalid."); |
| 147 | |
| 148 | for (unsigned i = 0; i != NumOperands; ++i) |
| 149 | Operands[i] = read_vbr_uint(Buf, EndBuf); |
| 150 | align32(Buf, EndBuf); |
| 151 | break; |
| 152 | } |
| 153 | |
| 154 | return handler->handleInstruction(Opcode, getType(iType), Operands); |
| 155 | } |
| 156 | |
| 157 | /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one |
| 158 | /// basicblock at a time. This method reads in one of the basicblock packets. |
| 159 | void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, |
| 160 | BufPtr EndBuf, |
| 161 | unsigned BlockNo) { |
| 162 | handler->handleBasicBlockBegin( BlockNo ); |
| 163 | |
| 164 | std::vector<unsigned> Args; |
| 165 | bool is_terminating = false; |
| 166 | while (Buf < EndBuf) |
| 167 | is_terminating = ParseInstruction(Buf, EndBuf, Args); |
| 168 | |
| 169 | if ( ! is_terminating ) |
| 170 | PARSE_ERROR( |
| 171 | "Failed to recognize instruction as terminating at end of block"); |
| 172 | |
| 173 | handler->handleBasicBlockEnd( BlockNo ); |
| 174 | } |
| 175 | |
| 176 | |
| 177 | /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the |
| 178 | /// body of a function. In post 1.0 bytecode files, we no longer emit basic |
| 179 | /// block individually, in order to avoid per-basic-block overhead. |
Reid Spencer | 5e8868d | 2004-06-08 05:54:47 +0000 | [diff] [blame] | 180 | unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, |
| 181 | BufPtr EndBuf) { |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 182 | unsigned BlockNo = 0; |
| 183 | std::vector<unsigned> Args; |
| 184 | |
| 185 | while (Buf < EndBuf) { |
| 186 | handler->handleBasicBlockBegin( BlockNo ); |
| 187 | |
| 188 | // Read instructions into this basic block until we get to a terminator |
| 189 | bool is_terminating = false; |
| 190 | while (Buf < EndBuf && !is_terminating ) |
| 191 | is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; |
| 192 | |
| 193 | if (!is_terminating) |
| 194 | PARSE_ERROR( "Non-terminated basic block found!"); |
| 195 | |
| 196 | handler->handleBasicBlockEnd( BlockNo ); |
| 197 | ++BlockNo; |
| 198 | } |
| 199 | return BlockNo; |
| 200 | } |
| 201 | |
| 202 | void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { |
| 203 | handler->handleSymbolTableBegin(); |
| 204 | |
| 205 | while (Buf < EndBuf) { |
| 206 | // Symtab block header: [num entries][type id number] |
| 207 | unsigned NumEntries = read_vbr_uint(Buf, EndBuf); |
| 208 | unsigned Typ = read_vbr_uint(Buf, EndBuf); |
| 209 | const Type *Ty = getType(Typ); |
| 210 | |
| 211 | handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); |
| 212 | |
| 213 | for (unsigned i = 0; i != NumEntries; ++i) { |
| 214 | // Symtab entry: [def slot #][name] |
| 215 | unsigned slot = read_vbr_uint(Buf, EndBuf); |
| 216 | std::string Name = read_str(Buf, EndBuf); |
| 217 | |
| 218 | if (Typ == Type::TypeTyID) |
| 219 | handler->handleSymbolTableType( i, slot, Name ); |
| 220 | else |
| 221 | handler->handleSymbolTableValue( i, slot, Name ); |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | if (Buf > EndBuf) |
| 226 | PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); |
| 227 | |
| 228 | handler->handleSymbolTableEnd(); |
| 229 | } |
| 230 | |
| 231 | void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { |
| 232 | if (FunctionSignatureList.empty()) |
| 233 | throw std::string("FunctionSignatureList empty!"); |
| 234 | |
| 235 | const Type *FType = FunctionSignatureList.back(); |
| 236 | FunctionSignatureList.pop_back(); |
| 237 | |
| 238 | // Save the information for future reading of the function |
| 239 | LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); |
| 240 | // Pretend we've `parsed' this function |
| 241 | Buf = EndBuf; |
| 242 | } |
| 243 | |
| 244 | void AbstractBytecodeParser::ParseNextFunction(Type* FType) { |
| 245 | // Find {start, end} pointers and slot in the map. If not there, we're done. |
| 246 | LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType); |
| 247 | |
| 248 | // Make sure we found it |
| 249 | if ( Fi == LazyFunctionLoadMap.end() ) { |
| 250 | PARSE_ERROR("Unrecognized function of type " << FType->getDescription()); |
| 251 | return; |
| 252 | } |
| 253 | |
| 254 | BufPtr Buf = Fi->second.Buf; |
| 255 | BufPtr EndBuf = Fi->second.EndBuf; |
| 256 | assert(Fi->first == FType); |
| 257 | |
| 258 | LazyFunctionLoadMap.erase(Fi); |
| 259 | |
| 260 | this->ParseFunctionBody( FType, Buf, EndBuf ); |
| 261 | } |
| 262 | |
| 263 | void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, |
| 264 | BufPtr &Buf, BufPtr EndBuf ) { |
| 265 | |
| 266 | GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; |
| 267 | |
| 268 | unsigned LinkageType = read_vbr_uint(Buf, EndBuf); |
| 269 | switch (LinkageType) { |
| 270 | case 0: Linkage = GlobalValue::ExternalLinkage; break; |
| 271 | case 1: Linkage = GlobalValue::WeakLinkage; break; |
| 272 | case 2: Linkage = GlobalValue::AppendingLinkage; break; |
| 273 | case 3: Linkage = GlobalValue::InternalLinkage; break; |
| 274 | case 4: Linkage = GlobalValue::LinkOnceLinkage; break; |
| 275 | default: |
| 276 | PARSE_ERROR("Invalid linkage type for Function."); |
| 277 | Linkage = GlobalValue::InternalLinkage; |
| 278 | break; |
| 279 | } |
| 280 | |
| 281 | handler->handleFunctionBegin(FType,Linkage); |
| 282 | |
| 283 | // Keep track of how many basic blocks we have read in... |
| 284 | unsigned BlockNum = 0; |
| 285 | bool InsertedArguments = false; |
| 286 | |
| 287 | while (Buf < EndBuf) { |
| 288 | unsigned Type, Size; |
| 289 | BufPtr OldBuf = Buf; |
| 290 | readBlock(Buf, EndBuf, Type, Size); |
| 291 | |
| 292 | switch (Type) { |
| 293 | case BytecodeFormat::ConstantPool: |
| 294 | ParseConstantPool(Buf, Buf+Size, FunctionTypes ); |
| 295 | break; |
| 296 | |
| 297 | case BytecodeFormat::CompactionTable: |
| 298 | ParseCompactionTable(Buf, Buf+Size); |
| 299 | break; |
| 300 | |
| 301 | case BytecodeFormat::BasicBlock: |
| 302 | ParseBasicBlock(Buf, Buf+Size, BlockNum++); |
| 303 | break; |
| 304 | |
| 305 | case BytecodeFormat::InstructionList: |
| 306 | if (BlockNum) |
| 307 | PARSE_ERROR("InstructionList must come before basic blocks!"); |
| 308 | BlockNum = ParseInstructionList(Buf, Buf+Size); |
| 309 | break; |
| 310 | |
| 311 | case BytecodeFormat::SymbolTable: |
| 312 | ParseSymbolTable(Buf, Buf+Size ); |
| 313 | break; |
| 314 | |
| 315 | default: |
| 316 | Buf += Size; |
| 317 | if (OldBuf > Buf) |
| 318 | PARSE_ERROR("Wrapped around reading bytecode"); |
| 319 | break; |
| 320 | } |
| 321 | |
| 322 | // Malformed bc file if read past end of block. |
| 323 | align32(Buf, EndBuf); |
| 324 | } |
| 325 | |
| 326 | handler->handleFunctionEnd(FType); |
| 327 | |
| 328 | // Clear out function-level types... |
| 329 | FunctionTypes.clear(); |
| 330 | CompactionTypeTable.clear(); |
| 331 | } |
| 332 | |
| 333 | void AbstractBytecodeParser::ParseAllFunctionBodies() { |
| 334 | LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); |
| 335 | LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); |
| 336 | |
| 337 | while ( Fi != Fe ) { |
| 338 | const Type* FType = Fi->first; |
| 339 | this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { |
| 344 | |
| 345 | handler->handleCompactionTableBegin(); |
| 346 | |
| 347 | while (Buf != End) { |
| 348 | unsigned NumEntries = read_vbr_uint(Buf, End); |
| 349 | unsigned Ty; |
| 350 | |
| 351 | if ((NumEntries & 3) == 3) { |
| 352 | NumEntries >>= 2; |
| 353 | Ty = read_vbr_uint(Buf, End); |
| 354 | } else { |
| 355 | Ty = NumEntries >> 2; |
| 356 | NumEntries &= 3; |
| 357 | } |
| 358 | |
| 359 | handler->handleCompactionTablePlane( Ty, NumEntries ); |
| 360 | |
| 361 | if (Ty == Type::TypeTyID) { |
| 362 | for (unsigned i = 0; i != NumEntries; ++i) { |
| 363 | unsigned TypeSlot = read_vbr_uint(Buf,End); |
| 364 | const Type *Typ = getGlobalTableType(TypeSlot); |
| 365 | handler->handleCompactionTableType( i, TypeSlot, Typ ); |
| 366 | } |
| 367 | } else { |
| 368 | const Type *Typ = getType(Ty); |
| 369 | // Push the implicit zero |
| 370 | for (unsigned i = 0; i != NumEntries; ++i) { |
| 371 | unsigned ValSlot = read_vbr_uint(Buf, End); |
| 372 | handler->handleCompactionTableValue( i, ValSlot, Typ ); |
| 373 | } |
| 374 | } |
| 375 | } |
| 376 | handler->handleCompactionTableEnd(); |
| 377 | } |
| 378 | |
| 379 | const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, |
| 380 | const unsigned char *EndBuf) { |
| 381 | unsigned PrimType = read_vbr_uint(Buf, EndBuf); |
| 382 | |
| 383 | const Type *Val = 0; |
| 384 | if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) |
| 385 | return Val; |
| 386 | |
| 387 | switch (PrimType) { |
| 388 | case Type::FunctionTyID: { |
| 389 | const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); |
| 390 | |
| 391 | unsigned NumParams = read_vbr_uint(Buf, EndBuf); |
| 392 | |
| 393 | std::vector<const Type*> Params; |
| 394 | while (NumParams--) |
| 395 | Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); |
| 396 | |
| 397 | bool isVarArg = Params.size() && Params.back() == Type::VoidTy; |
| 398 | if (isVarArg) Params.pop_back(); |
| 399 | |
| 400 | Type* result = FunctionType::get(RetType, Params, isVarArg); |
| 401 | handler->handleType( result ); |
| 402 | return result; |
| 403 | } |
| 404 | case Type::ArrayTyID: { |
| 405 | unsigned ElTyp = read_vbr_uint(Buf, EndBuf); |
| 406 | const Type *ElementType = getType(ElTyp); |
| 407 | |
| 408 | unsigned NumElements = read_vbr_uint(Buf, EndBuf); |
| 409 | |
| 410 | BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" |
| 411 | << NumElements << "\n"); |
| 412 | Type* result = ArrayType::get(ElementType, NumElements); |
| 413 | handler->handleType( result ); |
| 414 | return result; |
| 415 | } |
| 416 | case Type::StructTyID: { |
| 417 | std::vector<const Type*> Elements; |
| 418 | unsigned Typ = read_vbr_uint(Buf, EndBuf); |
| 419 | while (Typ) { // List is terminated by void/0 typeid |
| 420 | Elements.push_back(getType(Typ)); |
| 421 | Typ = read_vbr_uint(Buf, EndBuf); |
| 422 | } |
| 423 | |
| 424 | Type* result = StructType::get(Elements); |
| 425 | handler->handleType( result ); |
| 426 | return result; |
| 427 | } |
| 428 | case Type::PointerTyID: { |
| 429 | unsigned ElTyp = read_vbr_uint(Buf, EndBuf); |
| 430 | BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); |
| 431 | Type* result = PointerType::get(getType(ElTyp)); |
| 432 | handler->handleType( result ); |
| 433 | return result; |
| 434 | } |
| 435 | |
| 436 | case Type::OpaqueTyID: { |
| 437 | Type* result = OpaqueType::get(); |
| 438 | handler->handleType( result ); |
| 439 | return result; |
| 440 | } |
| 441 | |
| 442 | default: |
| 443 | PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n"); |
| 444 | return Val; |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | // ParseTypeConstants - We have to use this weird code to handle recursive |
| 449 | // types. We know that recursive types will only reference the current slab of |
| 450 | // values in the type plane, but they can forward reference types before they |
| 451 | // have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might |
| 452 | // be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix |
| 453 | // this ugly problem, we pessimistically insert an opaque type for each type we |
| 454 | // are about to read. This means that forward references will resolve to |
| 455 | // something and when we reread the type later, we can replace the opaque type |
| 456 | // with a new resolved concrete type. |
| 457 | // |
| 458 | void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, |
| 459 | const unsigned char *EndBuf, |
| 460 | TypeListTy &Tab, |
| 461 | unsigned NumEntries) { |
| 462 | assert(Tab.size() == 0 && "should not have read type constants in before!"); |
| 463 | |
| 464 | // Insert a bunch of opaque types to be resolved later... |
| 465 | Tab.reserve(NumEntries); |
| 466 | for (unsigned i = 0; i != NumEntries; ++i) |
| 467 | Tab.push_back(OpaqueType::get()); |
| 468 | |
| 469 | // Loop through reading all of the types. Forward types will make use of the |
| 470 | // opaque types just inserted. |
| 471 | // |
| 472 | for (unsigned i = 0; i != NumEntries; ++i) { |
| 473 | const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); |
| 474 | if (NewTy == 0) throw std::string("Couldn't parse type!"); |
| 475 | BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << |
| 476 | "' Replacing: " << OldTy << "\n"); |
| 477 | |
| 478 | // Don't insertValue the new type... instead we want to replace the opaque |
| 479 | // type with the new concrete value... |
| 480 | // |
| 481 | |
| 482 | // Refine the abstract type to the new type. This causes all uses of the |
| 483 | // abstract type to use NewTy. This also will cause the opaque type to be |
| 484 | // deleted... |
| 485 | // |
| 486 | cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy); |
| 487 | |
| 488 | // This should have replace the old opaque type with the new type in the |
| 489 | // value table... or with a preexisting type that was already in the system |
| 490 | assert(Tab[i] != OldTy && "refineAbstractType didn't work!"); |
| 491 | } |
| 492 | |
| 493 | BCR_TRACE(5, "Resulting types:\n"); |
| 494 | for (unsigned i = 0; i < NumEntries; ++i) { |
| 495 | BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n"); |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | |
| 500 | void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, |
| 501 | const unsigned char *EndBuf, |
| 502 | unsigned TypeID) { |
| 503 | |
| 504 | // We must check for a ConstantExpr before switching by type because |
| 505 | // a ConstantExpr can be of any type, and has no explicit value. |
| 506 | // |
| 507 | // 0 if not expr; numArgs if is expr |
| 508 | unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); |
| 509 | |
| 510 | if (isExprNumArgs) { |
| 511 | unsigned Opcode = read_vbr_uint(Buf, EndBuf); |
| 512 | const Type* Typ = getType(TypeID); |
| 513 | |
| 514 | // FIXME: Encoding of constant exprs could be much more compact! |
| 515 | std::vector<std::pair<const Type*,unsigned> > ArgVec; |
| 516 | ArgVec.reserve(isExprNumArgs); |
| 517 | |
| 518 | // Read the slot number and types of each of the arguments |
| 519 | for (unsigned i = 0; i != isExprNumArgs; ++i) { |
| 520 | unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); |
| 521 | unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); |
| 522 | BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) |
| 523 | << "' slot: " << ArgValSlot << "\n"); |
| 524 | |
| 525 | // Get the arg value from its slot if it exists, otherwise a placeholder |
| 526 | ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot)); |
| 527 | } |
| 528 | |
| 529 | handler->handleConstantExpression( Opcode, Typ, ArgVec ); |
| 530 | return; |
| 531 | } |
| 532 | |
| 533 | // Ok, not an ConstantExpr. We now know how to read the given type... |
| 534 | const Type *Ty = getType(TypeID); |
| 535 | switch (Ty->getPrimitiveID()) { |
| 536 | case Type::BoolTyID: { |
| 537 | unsigned Val = read_vbr_uint(Buf, EndBuf); |
| 538 | if (Val != 0 && Val != 1) |
| 539 | PARSE_ERROR("Invalid boolean value read."); |
| 540 | |
| 541 | handler->handleConstantValue( ConstantBool::get(Val == 1)); |
| 542 | break; |
| 543 | } |
| 544 | |
| 545 | case Type::UByteTyID: // Unsigned integer types... |
| 546 | case Type::UShortTyID: |
| 547 | case Type::UIntTyID: { |
| 548 | unsigned Val = read_vbr_uint(Buf, EndBuf); |
| 549 | if (!ConstantUInt::isValueValidForType(Ty, Val)) |
| 550 | throw std::string("Invalid unsigned byte/short/int read."); |
| 551 | handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); |
| 552 | break; |
| 553 | } |
| 554 | |
| 555 | case Type::ULongTyID: { |
| 556 | handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); |
| 557 | break; |
| 558 | } |
| 559 | |
| 560 | case Type::SByteTyID: // Signed integer types... |
| 561 | case Type::ShortTyID: |
| 562 | case Type::IntTyID: { |
| 563 | case Type::LongTyID: |
| 564 | int64_t Val = read_vbr_int64(Buf, EndBuf); |
| 565 | if (!ConstantSInt::isValueValidForType(Ty, Val)) |
| 566 | throw std::string("Invalid signed byte/short/int/long read."); |
| 567 | handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); |
| 568 | break; |
| 569 | } |
| 570 | |
| 571 | case Type::FloatTyID: { |
| 572 | float F; |
| 573 | input_data(Buf, EndBuf, &F, &F+1); |
| 574 | handler->handleConstantValue( ConstantFP::get(Ty, F) ); |
| 575 | break; |
| 576 | } |
| 577 | |
| 578 | case Type::DoubleTyID: { |
| 579 | double Val; |
| 580 | input_data(Buf, EndBuf, &Val, &Val+1); |
| 581 | handler->handleConstantValue( ConstantFP::get(Ty, Val) ); |
| 582 | break; |
| 583 | } |
| 584 | |
| 585 | case Type::TypeTyID: |
| 586 | PARSE_ERROR("Type constants shouldn't live in constant table!"); |
| 587 | break; |
| 588 | |
| 589 | case Type::ArrayTyID: { |
| 590 | const ArrayType *AT = cast<ArrayType>(Ty); |
| 591 | unsigned NumElements = AT->getNumElements(); |
| 592 | std::vector<unsigned> Elements; |
| 593 | Elements.reserve(NumElements); |
| 594 | while (NumElements--) // Read all of the elements of the constant. |
| 595 | Elements.push_back(read_vbr_uint(Buf, EndBuf)); |
| 596 | |
| 597 | handler->handleConstantArray( AT, Elements ); |
| 598 | break; |
| 599 | } |
| 600 | |
| 601 | case Type::StructTyID: { |
| 602 | const StructType *ST = cast<StructType>(Ty); |
| 603 | std::vector<unsigned> Elements; |
| 604 | Elements.reserve(ST->getNumElements()); |
| 605 | for (unsigned i = 0; i != ST->getNumElements(); ++i) |
| 606 | Elements.push_back(read_vbr_uint(Buf, EndBuf)); |
| 607 | |
| 608 | handler->handleConstantStruct( ST, Elements ); |
| 609 | } |
| 610 | |
| 611 | case Type::PointerTyID: { // ConstantPointerRef value... |
| 612 | const PointerType *PT = cast<PointerType>(Ty); |
| 613 | unsigned Slot = read_vbr_uint(Buf, EndBuf); |
| 614 | handler->handleConstantPointer( PT, Slot ); |
| 615 | } |
| 616 | |
| 617 | default: |
| 618 | PARSE_ERROR("Don't know how to deserialize constant value of type '"+ |
| 619 | Ty->getDescription()); |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, |
| 624 | const unsigned char *EndBuf) { |
| 625 | ParseConstantPool(Buf, EndBuf, ModuleTypes); |
| 626 | } |
| 627 | |
| 628 | void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, |
| 629 | const unsigned char *EndBuf, |
| 630 | unsigned NumEntries ){ |
| 631 | for (; NumEntries; --NumEntries) { |
| 632 | unsigned Typ = read_vbr_uint(Buf, EndBuf); |
| 633 | const Type *Ty = getType(Typ); |
| 634 | if (!isa<ArrayType>(Ty)) |
| 635 | throw std::string("String constant data invalid!"); |
| 636 | |
| 637 | const ArrayType *ATy = cast<ArrayType>(Ty); |
| 638 | if (ATy->getElementType() != Type::SByteTy && |
| 639 | ATy->getElementType() != Type::UByteTy) |
| 640 | throw std::string("String constant data invalid!"); |
| 641 | |
| 642 | // Read character data. The type tells us how long the string is. |
| 643 | char Data[ATy->getNumElements()]; |
| 644 | input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); |
| 645 | |
| 646 | std::vector<Constant*> Elements(ATy->getNumElements()); |
| 647 | if (ATy->getElementType() == Type::SByteTy) |
| 648 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) |
| 649 | Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]); |
| 650 | else |
| 651 | for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) |
| 652 | Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]); |
| 653 | |
| 654 | // Create the constant, inserting it as needed. |
| 655 | ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) ); |
| 656 | handler->handleConstantString( C ); |
| 657 | } |
| 658 | } |
| 659 | |
| 660 | |
| 661 | void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, |
| 662 | const unsigned char *EndBuf, |
| 663 | TypeListTy &TypeTab) { |
| 664 | while (Buf < EndBuf) { |
| 665 | unsigned NumEntries = read_vbr_uint(Buf, EndBuf); |
| 666 | unsigned Typ = read_vbr_uint(Buf, EndBuf); |
| 667 | if (Typ == Type::TypeTyID) { |
| 668 | ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); |
| 669 | } else if (Typ == Type::VoidTyID) { |
| 670 | ParseStringConstants(Buf, EndBuf, NumEntries); |
| 671 | } else { |
| 672 | BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " |
| 673 | << NumEntries << "\n"); |
| 674 | |
| 675 | for (unsigned i = 0; i < NumEntries; ++i) { |
| 676 | ParseConstantValue(Buf, EndBuf, Typ); |
| 677 | } |
| 678 | } |
| 679 | } |
| 680 | |
| 681 | if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); |
| 682 | } |
| 683 | |
| 684 | void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { |
| 685 | |
| 686 | handler->handleModuleGlobalsBegin(); |
| 687 | |
| 688 | // Read global variables... |
| 689 | unsigned VarType = read_vbr_uint(Buf, End); |
| 690 | while (VarType != Type::VoidTyID) { // List is terminated by Void |
| 691 | // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = |
| 692 | // Linkage, bit4+ = slot# |
| 693 | unsigned SlotNo = VarType >> 5; |
| 694 | unsigned LinkageID = (VarType >> 2) & 7; |
| 695 | bool isConstant = VarType & 1; |
| 696 | bool hasInitializer = VarType & 2; |
| 697 | GlobalValue::LinkageTypes Linkage; |
| 698 | |
| 699 | switch (LinkageID) { |
| 700 | case 0: Linkage = GlobalValue::ExternalLinkage; break; |
| 701 | case 1: Linkage = GlobalValue::WeakLinkage; break; |
| 702 | case 2: Linkage = GlobalValue::AppendingLinkage; break; |
| 703 | case 3: Linkage = GlobalValue::InternalLinkage; break; |
| 704 | case 4: Linkage = GlobalValue::LinkOnceLinkage; break; |
| 705 | default: |
| 706 | PARSE_ERROR("Unknown linkage type: " << LinkageID); |
| 707 | Linkage = GlobalValue::InternalLinkage; |
| 708 | break; |
| 709 | } |
| 710 | |
| 711 | const Type *Ty = getType(SlotNo); |
| 712 | if ( !Ty ) { |
| 713 | PARSE_ERROR("Global has no type! SlotNo=" << SlotNo); |
| 714 | } |
| 715 | |
| 716 | if ( !isa<PointerType>(Ty)) { |
| 717 | PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription()); |
| 718 | } |
| 719 | |
| 720 | const Type *ElTy = cast<PointerType>(Ty)->getElementType(); |
| 721 | |
| 722 | // Create the global variable... |
Reid Spencer | 5e8868d | 2004-06-08 05:54:47 +0000 | [diff] [blame] | 723 | if (hasInitializer) { |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 724 | unsigned initSlot = read_vbr_uint(Buf,End); |
| 725 | handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); |
Reid Spencer | 5e8868d | 2004-06-08 05:54:47 +0000 | [diff] [blame] | 726 | } else |
| 727 | handler->handleGlobalVariable( ElTy, isConstant, Linkage ); |
Reid Spencer | dac69c8 | 2004-06-07 17:53:43 +0000 | [diff] [blame] | 728 | |
| 729 | // Get next item |
| 730 | VarType = read_vbr_uint(Buf, End); |
| 731 | } |
| 732 | |
| 733 | // Read the function objects for all of the functions that are coming |
| 734 | unsigned FnSignature = read_vbr_uint(Buf, End); |
| 735 | while (FnSignature != Type::VoidTyID) { // List is terminated by Void |
| 736 | const Type *Ty = getType(FnSignature); |
| 737 | if (!isa<PointerType>(Ty) || |
| 738 | !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) { |
| 739 | PARSE_ERROR( "Function not a pointer to function type! Ty = " + |
| 740 | Ty->getDescription()); |
| 741 | // FIXME: what should Ty be if handler continues? |
| 742 | } |
| 743 | |
| 744 | // We create functions by passing the underlying FunctionType to create... |
| 745 | Ty = cast<PointerType>(Ty)->getElementType(); |
| 746 | |
| 747 | // Save this for later so we know type of lazily instantiated functions |
| 748 | FunctionSignatureList.push_back(Ty); |
| 749 | |
| 750 | handler->handleFunctionDeclaration(Ty); |
| 751 | |
| 752 | // Get Next function signature |
| 753 | FnSignature = read_vbr_uint(Buf, End); |
| 754 | } |
| 755 | |
| 756 | if (hasInconsistentModuleGlobalInfo) |
| 757 | align32(Buf, End); |
| 758 | |
| 759 | // This is for future proofing... in the future extra fields may be added that |
| 760 | // we don't understand, so we transparently ignore them. |
| 761 | // |
| 762 | Buf = End; |
| 763 | |
| 764 | handler->handleModuleGlobalsEnd(); |
| 765 | } |
| 766 | |
| 767 | void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { |
| 768 | unsigned Version = read_vbr_uint(Buf, EndBuf); |
| 769 | |
| 770 | // Unpack version number: low four bits are for flags, top bits = version |
| 771 | Module::Endianness Endianness; |
| 772 | Module::PointerSize PointerSize; |
| 773 | Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian; |
| 774 | PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32; |
| 775 | |
| 776 | bool hasNoEndianness = Version & 4; |
| 777 | bool hasNoPointerSize = Version & 8; |
| 778 | |
| 779 | RevisionNum = Version >> 4; |
| 780 | |
| 781 | // Default values for the current bytecode version |
| 782 | hasInconsistentModuleGlobalInfo = false; |
| 783 | hasExplicitPrimitiveZeros = false; |
| 784 | hasRestrictedGEPTypes = false; |
| 785 | |
| 786 | switch (RevisionNum) { |
| 787 | case 0: // LLVM 1.0, 1.1 release version |
| 788 | // Base LLVM 1.0 bytecode format. |
| 789 | hasInconsistentModuleGlobalInfo = true; |
| 790 | hasExplicitPrimitiveZeros = true; |
| 791 | // FALL THROUGH |
| 792 | case 1: // LLVM 1.2 release version |
| 793 | // LLVM 1.2 added explicit support for emitting strings efficiently. |
| 794 | |
| 795 | // Also, it fixed the problem where the size of the ModuleGlobalInfo block |
| 796 | // included the size for the alignment at the end, where the rest of the |
| 797 | // blocks did not. |
| 798 | |
| 799 | // LLVM 1.2 and before required that GEP indices be ubyte constants for |
| 800 | // structures and longs for sequential types. |
| 801 | hasRestrictedGEPTypes = true; |
| 802 | |
| 803 | // FALL THROUGH |
| 804 | case 2: // LLVM 1.3 release version |
| 805 | break; |
| 806 | |
| 807 | default: |
| 808 | PARSE_ERROR("Unknown bytecode version number: " << RevisionNum); |
| 809 | } |
| 810 | |
| 811 | if (hasNoEndianness) Endianness = Module::AnyEndianness; |
| 812 | if (hasNoPointerSize) PointerSize = Module::AnyPointerSize; |
| 813 | |
| 814 | handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); |
| 815 | } |
| 816 | |
| 817 | void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { |
| 818 | unsigned Type, Size; |
| 819 | readBlock(Buf, EndBuf, Type, Size); |
| 820 | if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) |
| 821 | // Hrm, not a class? |
| 822 | PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << |
| 823 | ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); |
| 824 | |
| 825 | // Read into instance variables... |
| 826 | ParseVersionInfo(Buf, EndBuf); |
| 827 | align32(Buf, EndBuf); |
| 828 | |
| 829 | bool SeenModuleGlobalInfo = false; |
| 830 | bool SeenGlobalTypePlane = false; |
| 831 | while (Buf < EndBuf) { |
| 832 | BufPtr OldBuf = Buf; |
| 833 | readBlock(Buf, EndBuf, Type, Size); |
| 834 | |
| 835 | switch (Type) { |
| 836 | |
| 837 | case BytecodeFormat::GlobalTypePlane: |
| 838 | if ( SeenGlobalTypePlane ) |
| 839 | PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); |
| 840 | |
| 841 | ParseGlobalTypes(Buf, Buf+Size); |
| 842 | SeenGlobalTypePlane = true; |
| 843 | break; |
| 844 | |
| 845 | case BytecodeFormat::ModuleGlobalInfo: |
| 846 | if ( SeenModuleGlobalInfo ) |
| 847 | PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); |
| 848 | ParseModuleGlobalInfo(Buf, Buf+Size); |
| 849 | SeenModuleGlobalInfo = true; |
| 850 | break; |
| 851 | |
| 852 | case BytecodeFormat::ConstantPool: |
| 853 | ParseConstantPool(Buf, Buf+Size, ModuleTypes); |
| 854 | break; |
| 855 | |
| 856 | case BytecodeFormat::Function: |
| 857 | ParseFunctionLazily(Buf, Buf+Size); |
| 858 | break; |
| 859 | |
| 860 | case BytecodeFormat::SymbolTable: |
| 861 | ParseSymbolTable(Buf, Buf+Size ); |
| 862 | break; |
| 863 | |
| 864 | default: |
| 865 | Buf += Size; |
| 866 | if (OldBuf > Buf) |
| 867 | { |
| 868 | PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); |
| 869 | } |
| 870 | break; |
| 871 | } |
| 872 | align32(Buf, EndBuf); |
| 873 | } |
| 874 | } |
| 875 | |
| 876 | void AbstractBytecodeParser::ParseBytecode( |
| 877 | BufPtr Buf, unsigned Length, |
| 878 | const std::string &ModuleID) { |
| 879 | |
| 880 | handler->handleStart(); |
| 881 | unsigned char *EndBuf = (unsigned char*)(Buf + Length); |
| 882 | |
| 883 | // Read and check signature... |
| 884 | unsigned Sig = read(Buf, EndBuf); |
| 885 | if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { |
| 886 | PARSE_ERROR("Invalid bytecode signature: " << Sig); |
| 887 | } |
| 888 | |
| 889 | handler->handleModuleBegin(ModuleID); |
| 890 | |
| 891 | this->ParseModule(Buf, EndBuf); |
| 892 | |
| 893 | handler->handleModuleEnd(ModuleID); |
| 894 | |
| 895 | handler->handleFinish(); |
| 896 | } |
| 897 | |
| 898 | // vim: sw=2 |