Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 1 | //===- Reader.cpp - Code to read bytecode files -----------------------------=== |
| 2 | // |
| 3 | // This library implements the functionality defined in llvm/Bytecode/Reader.h |
| 4 | // |
| 5 | // Note that this library should be as fast as possible, reentrant, and |
| 6 | // threadsafe!! |
| 7 | // |
| 8 | // TODO: Make error message outputs be configurable depending on an option? |
| 9 | // TODO: Allow passing in an option to ignore the symbol table |
| 10 | // |
| 11 | //===------------------------------------------------------------------------=== |
| 12 | |
| 13 | #include "llvm/Bytecode/Reader.h" |
| 14 | #include "llvm/Bytecode/Format.h" |
| 15 | #include "llvm/Module.h" |
| 16 | #include "llvm/BasicBlock.h" |
| 17 | #include "llvm/DerivedTypes.h" |
| 18 | #include "llvm/ConstPoolVals.h" |
| 19 | #include "llvm/iOther.h" |
| 20 | #include "ReaderInternals.h" |
| 21 | #include <sys/types.h> |
| 22 | #include <sys/mman.h> |
| 23 | #include <sys/stat.h> |
| 24 | #include <fcntl.h> |
| 25 | #include <unistd.h> |
| 26 | #include <algorithm> |
| 27 | |
| 28 | bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) { |
| 29 | if (Ty->isPrimitiveType()) { |
| 30 | Slot = Ty->getPrimitiveID(); |
| 31 | } else { |
| 32 | TypeMapType::iterator I = TypeMap.find(Ty); |
| 33 | if (I == TypeMap.end()) return true; // Didn't find type! |
| 34 | Slot = I->second; |
| 35 | } |
| 36 | //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl; |
| 37 | return false; |
| 38 | } |
| 39 | |
| 40 | const Type *BytecodeParser::getType(unsigned ID) { |
| 41 | const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID); |
| 42 | if (T) return T; |
| 43 | |
| 44 | //cerr << "Looking up Type ID: " << ID << endl; |
| 45 | |
| 46 | const Value *D = getValue(Type::TypeTy, ID, false); |
| 47 | if (D == 0) return 0; |
| 48 | |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 49 | assert(D->getType() == Type::TypeTy); |
| 50 | return ((const ConstPoolType*)D->castConstantAsserting())->getValue(); |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) { |
| 54 | unsigned type; |
| 55 | if (getTypeSlot(Def->getType(), type)) return true; |
| 56 | |
| 57 | if (ValueTab.size() <= type) |
| 58 | ValueTab.resize(type+1, ValueList()); |
| 59 | |
| 60 | //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size() |
| 61 | // << "] = " << Def << endl; |
| 62 | |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 63 | if (type == Type::TypeTyID && Def->isConstant()) { |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 64 | const Type *Ty = ((const ConstPoolType*)Def)->getValue(); |
| 65 | unsigned ValueOffset = FirstDerivedTyID; |
| 66 | |
| 67 | if (&ValueTab == &Values) // Take into consideration module level types |
| 68 | ValueOffset += ModuleValues[type].size(); |
| 69 | |
| 70 | if (TypeMap.find(Ty) == TypeMap.end()) |
| 71 | TypeMap[Ty] = ValueTab[type].size()+ValueOffset; |
| 72 | } |
| 73 | |
| 74 | ValueTab[type].push_back(Def); |
| 75 | |
| 76 | return false; |
| 77 | } |
| 78 | |
| 79 | Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) { |
| 80 | unsigned Num = oNum; |
| 81 | unsigned type; // The type plane it lives in... |
| 82 | |
| 83 | if (getTypeSlot(Ty, type)) return 0; // TODO: true |
| 84 | |
| 85 | if (type == Type::TypeTyID) { // The 'type' plane has implicit values |
| 86 | const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num); |
| 87 | if (T) return (Value*)T; // Asked for a primitive type... |
| 88 | |
| 89 | // Otherwise, derived types need offset... |
| 90 | Num -= FirstDerivedTyID; |
| 91 | } |
| 92 | |
| 93 | if (ModuleValues.size() > type) { |
| 94 | if (ModuleValues[type].size() > Num) |
| 95 | return ModuleValues[type][Num]; |
| 96 | Num -= ModuleValues[type].size(); |
| 97 | } |
| 98 | |
| 99 | if (Values.size() > type && Values[type].size() > Num) |
| 100 | return Values[type][Num]; |
| 101 | |
| 102 | if (!Create) return 0; // Do not create a placeholder? |
| 103 | |
| 104 | Value *d = 0; |
| 105 | switch (Ty->getPrimitiveID()) { |
| 106 | case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break; |
| 107 | case Type::MethodTyID: |
| 108 | cerr << "Creating method pholder! : " << type << ":" << oNum << " " |
| 109 | << Ty->getName() << endl; |
| 110 | d = new MethPHolder(Ty, oNum); |
| 111 | insertValue(d, LateResolveModuleValues); |
| 112 | return d; |
| 113 | default: d = new DefPHolder(Ty, oNum); break; |
| 114 | } |
| 115 | |
| 116 | assert(d != 0 && "How did we not make something?"); |
| 117 | if (insertValue(d, LateResolveValues)) return 0; |
| 118 | return d; |
| 119 | } |
| 120 | |
| 121 | bool BytecodeParser::postResolveValues(ValueTable &ValTab) { |
| 122 | bool Error = false; |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 123 | for (unsigned ty = 0; ty < ValTab.size(); ++ty) { |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 124 | ValueList &DL = ValTab[ty]; |
| 125 | unsigned Size; |
| 126 | while ((Size = DL.size())) { |
| 127 | unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]); |
| 128 | |
| 129 | Value *D = DL[Size-1]; |
| 130 | DL.pop_back(); |
| 131 | |
| 132 | Value *NewDef = getValue(D->getType(), IDNumber, false); |
| 133 | if (NewDef == 0) { |
| 134 | Error = true; // Unresolved thinger |
| 135 | cerr << "Unresolvable reference found: <" << D->getType()->getName() |
| 136 | << ">:" << IDNumber << "!\n"; |
| 137 | } else { |
| 138 | // Fixup all of the uses of this placeholder def... |
| 139 | D->replaceAllUsesWith(NewDef); |
| 140 | |
| 141 | // Now that all the uses are gone, delete the placeholder... |
| 142 | // If we couldn't find a def (error case), then leak a little |
| 143 | delete D; // memory, 'cause otherwise we can't remove all uses! |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | return Error; |
| 149 | } |
| 150 | |
| 151 | bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf, |
| 152 | BasicBlock *&BB) { |
| 153 | BB = new BasicBlock(); |
| 154 | |
| 155 | while (Buf < EndBuf) { |
| 156 | Instruction *Def; |
| 157 | if (ParseInstruction(Buf, EndBuf, Def)) { |
| 158 | delete BB; |
| 159 | return true; |
| 160 | } |
| 161 | |
| 162 | if (Def == 0) { delete BB; return true; } |
| 163 | if (insertValue(Def, Values)) { delete BB; return true; } |
| 164 | |
| 165 | BB->getInstList().push_back(Def); |
| 166 | } |
| 167 | |
| 168 | return false; |
| 169 | } |
| 170 | |
| 171 | bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) { |
| 172 | while (Buf < EndBuf) { |
| 173 | // Symtab block header: [num entries][type id number] |
| 174 | unsigned NumEntries, Typ; |
| 175 | if (read_vbr(Buf, EndBuf, NumEntries) || |
| 176 | read_vbr(Buf, EndBuf, Typ)) return true; |
| 177 | const Type *Ty = getType(Typ); |
| 178 | if (Ty == 0) return true; |
| 179 | |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 180 | for (unsigned i = 0; i < NumEntries; ++i) { |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 181 | // Symtab entry: [def slot #][name] |
| 182 | unsigned slot; |
| 183 | if (read_vbr(Buf, EndBuf, slot)) return true; |
| 184 | string Name; |
| 185 | if (read(Buf, EndBuf, Name, false)) // Not aligned... |
| 186 | return true; |
| 187 | |
| 188 | Value *D = getValue(Ty, slot, false); // Find mapping... |
| 189 | if (D == 0) return true; |
| 190 | D->setName(Name); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | return Buf > EndBuf; |
| 195 | } |
| 196 | |
| 197 | |
| 198 | bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf, |
| 199 | Module *C) { |
| 200 | // Clear out the local values table... |
| 201 | Values.clear(); |
| 202 | if (MethodSignatureList.empty()) return true; // Unexpected method! |
| 203 | |
| 204 | const MethodType *MTy = MethodSignatureList.front().first; |
| 205 | unsigned MethSlot = MethodSignatureList.front().second; |
| 206 | MethodSignatureList.pop_front(); |
| 207 | Method *M = new Method(MTy); |
| 208 | |
| 209 | const MethodType::ParamTypes &Params = MTy->getParamTypes(); |
| 210 | for (MethodType::ParamTypes::const_iterator It = Params.begin(); |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 211 | It != Params.end(); ++It) { |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 212 | MethodArgument *MA = new MethodArgument(*It); |
| 213 | if (insertValue(MA, Values)) { delete M; return true; } |
| 214 | M->getArgumentList().push_back(MA); |
| 215 | } |
| 216 | |
| 217 | while (Buf < EndBuf) { |
| 218 | unsigned Type, Size; |
| 219 | const uchar *OldBuf = Buf; |
| 220 | if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; } |
| 221 | |
| 222 | switch (Type) { |
| 223 | case BytecodeFormat::ConstantPool: |
| 224 | if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) { |
| 225 | cerr << "Error reading constant pool!\n"; |
| 226 | delete M; return true; |
| 227 | } |
| 228 | break; |
| 229 | |
| 230 | case BytecodeFormat::BasicBlock: { |
| 231 | BasicBlock *BB; |
| 232 | if (ParseBasicBlock(Buf, Buf+Size, BB) || |
| 233 | insertValue(BB, Values)) { |
| 234 | cerr << "Error parsing basic block!\n"; |
| 235 | delete M; return true; // Parse error... :( |
| 236 | } |
| 237 | |
| 238 | M->getBasicBlocks().push_back(BB); |
| 239 | break; |
| 240 | } |
| 241 | |
| 242 | case BytecodeFormat::SymbolTable: |
| 243 | if (ParseSymbolTable(Buf, Buf+Size)) { |
| 244 | cerr << "Error reading method symbol table!\n"; |
| 245 | delete M; return true; |
| 246 | } |
| 247 | break; |
| 248 | |
| 249 | default: |
| 250 | Buf += Size; |
| 251 | if (OldBuf > Buf) return true; // Wrap around! |
| 252 | break; |
| 253 | } |
| 254 | if (align32(Buf, EndBuf)) { |
| 255 | delete M; // Malformed bc file, read past end of block. |
| 256 | return true; |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | if (postResolveValues(LateResolveValues) || |
| 261 | postResolveValues(LateResolveModuleValues)) { |
| 262 | delete M; return true; // Unresolvable references! |
| 263 | } |
| 264 | |
| 265 | Value *MethPHolder = getValue(MTy, MethSlot, false); |
| 266 | assert(MethPHolder && "Something is broken no placeholder found!"); |
Chris Lattner | 7fc9fe3 | 2001-06-27 23:41:11 +0000 | [diff] [blame] | 267 | assert(MethPHolder->isMethod() && "Not a method?"); |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 268 | |
| 269 | unsigned type; // Type slot |
| 270 | assert(!getTypeSlot(MTy, type) && "How can meth type not exist?"); |
| 271 | getTypeSlot(MTy, type); |
| 272 | |
| 273 | C->getMethodList().push_back(M); |
| 274 | |
| 275 | // Replace placeholder with the real method pointer... |
| 276 | ModuleValues[type][MethSlot] = M; |
| 277 | |
| 278 | // If anyone is using the placeholder make them use the real method instead |
| 279 | MethPHolder->replaceAllUsesWith(M); |
| 280 | |
| 281 | // We don't need the placeholder anymore! |
| 282 | delete MethPHolder; |
| 283 | |
| 284 | return false; |
| 285 | } |
| 286 | |
| 287 | bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, |
| 288 | Module *C) { |
| 289 | |
| 290 | if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks? |
| 291 | |
| 292 | // Read the method signatures for all of the methods that are coming, and |
| 293 | // create fillers in the Value tables. |
| 294 | unsigned MethSignature; |
| 295 | if (read_vbr(Buf, End, MethSignature)) return true; |
| 296 | while (MethSignature != Type::VoidTyID) { // List is terminated by Void |
| 297 | const Type *Ty = getType(MethSignature); |
| 298 | if (!Ty || !Ty->isMethodType()) { |
| 299 | cerr << "Method not meth type! "; |
| 300 | if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl; |
| 301 | return true; |
| 302 | } |
| 303 | |
| 304 | // When the ModuleGlobalInfo section is read, we load the type of each method |
| 305 | // and the 'ModuleValues' slot that it lands in. We then load a placeholder |
| 306 | // into its slot to reserve it. When the method is loaded, this placeholder |
| 307 | // is replaced. |
| 308 | |
| 309 | // Insert the placeholder... |
| 310 | Value *Def = new MethPHolder(Ty, 0); |
| 311 | insertValue(Def, ModuleValues); |
| 312 | |
| 313 | // Figure out which entry of its typeslot it went into... |
| 314 | unsigned TypeSlot; |
| 315 | if (getTypeSlot(Def->getType(), TypeSlot)) return true; |
| 316 | |
| 317 | unsigned SlotNo = ModuleValues[TypeSlot].size()-1; |
| 318 | |
| 319 | // Keep track of this information in a linked list that is emptied as |
| 320 | // methods are loaded... |
| 321 | // |
| 322 | MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo)); |
| 323 | if (read_vbr(Buf, End, MethSignature)) return true; |
| 324 | } |
| 325 | |
| 326 | if (align32(Buf, End)) return true; |
| 327 | |
| 328 | // This is for future proofing... in the future extra fields may be added that |
| 329 | // we don't understand, so we transparently ignore them. |
| 330 | // |
| 331 | Buf = End; |
| 332 | return false; |
| 333 | } |
| 334 | |
| 335 | bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf, |
| 336 | Module *&C) { |
| 337 | |
| 338 | unsigned Type, Size; |
| 339 | if (readBlock(Buf, EndBuf, Type, Size)) return true; |
| 340 | if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) |
| 341 | return true; // Hrm, not a class? |
| 342 | |
| 343 | MethodSignatureList.clear(); // Just in case... |
| 344 | |
| 345 | // Read into instance variables... |
| 346 | if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true; |
| 347 | if (align32(Buf, EndBuf)) return true; |
| 348 | |
| 349 | C = new Module(); |
| 350 | |
| 351 | while (Buf < EndBuf) { |
| 352 | const uchar *OldBuf = Buf; |
| 353 | if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; } |
| 354 | switch (Type) { |
| 355 | case BytecodeFormat::ModuleGlobalInfo: |
| 356 | if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) { |
| 357 | cerr << "Error reading class global info section!\n"; |
| 358 | delete C; return true; |
| 359 | } |
| 360 | break; |
| 361 | |
| 362 | case BytecodeFormat::ConstantPool: |
| 363 | if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) { |
| 364 | cerr << "Error reading class constant pool!\n"; |
| 365 | delete C; return true; |
| 366 | } |
| 367 | break; |
| 368 | |
| 369 | case BytecodeFormat::Method: { |
| 370 | if (ParseMethod(Buf, Buf+Size, C)) { |
| 371 | delete C; return true; // Error parsing method |
| 372 | } |
| 373 | break; |
| 374 | } |
| 375 | |
| 376 | case BytecodeFormat::SymbolTable: |
| 377 | if (ParseSymbolTable(Buf, Buf+Size)) { |
| 378 | cerr << "Error reading class symbol table!\n"; |
| 379 | delete C; return true; |
| 380 | } |
| 381 | break; |
| 382 | |
| 383 | default: |
| 384 | cerr << "Unknown class block: " << Type << endl; |
| 385 | Buf += Size; |
| 386 | if (OldBuf > Buf) return true; // Wrap around! |
| 387 | break; |
| 388 | } |
| 389 | if (align32(Buf, EndBuf)) { delete C; return true; } |
| 390 | } |
| 391 | |
| 392 | if (!MethodSignatureList.empty()) // Expected more methods! |
| 393 | return true; |
| 394 | return false; |
| 395 | } |
| 396 | |
| 397 | Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) { |
| 398 | LateResolveValues.clear(); |
| 399 | unsigned Sig; |
| 400 | // Read and check signature... |
| 401 | if (read(Buf, EndBuf, Sig) || |
| 402 | Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) |
| 403 | return 0; // Invalid signature! |
| 404 | |
| 405 | Module *Result; |
| 406 | if (ParseModule(Buf, EndBuf, Result)) return 0; |
| 407 | return Result; |
| 408 | } |
| 409 | |
| 410 | |
| 411 | Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) { |
| 412 | BytecodeParser Parser; |
| 413 | return Parser.ParseBytecode(Buffer, Buffer+Length); |
| 414 | } |
| 415 | |
| 416 | // Parse and return a class file... |
| 417 | // |
| 418 | Module *ParseBytecodeFile(const string &Filename) { |
| 419 | struct stat StatBuf; |
| 420 | Module *Result = 0; |
| 421 | |
| 422 | if (Filename != string("-")) { // Read from a file... |
Chris Lattner | b49ff5c | 2001-07-23 18:51:23 +0000 | [diff] [blame^] | 423 | int FD = open(Filename.c_str(), O_RDONLY); |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 424 | if (FD == -1) return 0; |
| 425 | |
| 426 | if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; } |
| 427 | |
| 428 | int Length = StatBuf.st_size; |
| 429 | if (Length == 0) { close(FD); return 0; } |
| 430 | uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ, |
| 431 | MAP_PRIVATE, FD, 0); |
| 432 | if (Buffer == (uchar*)-1) { close(FD); return 0; } |
| 433 | |
| 434 | BytecodeParser Parser; |
| 435 | Result = Parser.ParseBytecode(Buffer, Buffer+Length); |
| 436 | |
| 437 | munmap((char*)Buffer, Length); |
| 438 | close(FD); |
| 439 | } else { // Read from stdin |
| 440 | size_t FileSize = 0; |
| 441 | int BlockSize; |
| 442 | uchar Buffer[4096], *FileData = 0; |
| 443 | while ((BlockSize = read(0, Buffer, 4))) { |
| 444 | if (BlockSize == -1) { free(FileData); return 0; } |
| 445 | |
| 446 | FileData = (uchar*)realloc(FileData, FileSize+BlockSize); |
| 447 | memcpy(FileData+FileSize, Buffer, BlockSize); |
| 448 | FileSize += BlockSize; |
| 449 | } |
| 450 | |
| 451 | if (FileSize == 0) { free(FileData); return 0; } |
| 452 | |
| 453 | #define ALIGN_PTRS 1 |
| 454 | #if ALIGN_PTRS |
| 455 | uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE, |
| 456 | MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
| 457 | assert((Buf != (uchar*)-1) && "mmap returned error!"); |
| 458 | free(FileData); |
| 459 | memcpy(Buf, FileData, FileSize); |
| 460 | #else |
| 461 | uchar *Buf = FileData; |
| 462 | #endif |
| 463 | |
| 464 | BytecodeParser Parser; |
| 465 | Result = Parser.ParseBytecode(Buf, Buf+FileSize); |
| 466 | |
| 467 | #if ALIGN_PTRS |
| 468 | munmap((char*)Buf, FileSize); // Free mmap'd data area |
| 469 | #else |
| 470 | free(FileData); // Free realloc'd block of memory |
| 471 | #endif |
| 472 | } |
| 473 | |
| 474 | return Result; |
| 475 | } |