Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 1 | //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// |
John Criswell | b576c94 | 2003-10-20 19:43:21 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 5 | // This file was developed by Reid Spencer and is distributed under the |
| 6 | // University of Illinois Open Source License. See LICENSE.TXT for details. |
John Criswell | b576c94 | 2003-10-20 19:43:21 +0000 | [diff] [blame] | 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 9 | // |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 10 | // Builds up standard unix archive files (.a) containing LLVM bytecode. |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 14 | #include "ArchiveInternals.h" |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 15 | #include "llvm/Bytecode/Reader.h" |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 16 | |
Chris Lattner | 3446ae8 | 2004-01-10 19:00:15 +0000 | [diff] [blame] | 17 | using namespace llvm; |
Brian Gaeke | d0fde30 | 2003-11-11 22:41:34 +0000 | [diff] [blame] | 18 | |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 19 | namespace { |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 20 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 21 | /// Read a variable-bit-rate encoded unsigned integer |
| 22 | inline unsigned readInteger(const char*&At, const char*End) { |
| 23 | unsigned Shift = 0; |
| 24 | unsigned Result = 0; |
| 25 | |
| 26 | do { |
| 27 | if (At == End) |
| 28 | throw std::string("Ran out of data reading vbr_uint!"); |
| 29 | Result |= (unsigned)((*At++) & 0x7F) << Shift; |
| 30 | Shift += 7; |
| 31 | } while (At[-1] & 0x80); |
| 32 | return Result; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 33 | } |
| 34 | |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 35 | } |
| 36 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 37 | // Completely parse the Archive's symbol table and populate symTab member var. |
| 38 | void |
| 39 | Archive::parseSymbolTable(const void* data, unsigned size) { |
| 40 | const char* At = (const char*) data; |
| 41 | const char* End = At + size; |
| 42 | while (At < End) { |
| 43 | unsigned offset = readInteger(At, End); |
| 44 | unsigned length = readInteger(At, End); |
| 45 | if (At + length > End) |
| 46 | throw std::string("malformed symbol table"); |
| 47 | // we don't care if it can't be inserted (duplicate entry) |
| 48 | symTab.insert(std::make_pair(std::string(At,length),offset)); |
| 49 | At += length; |
| 50 | } |
| 51 | symTabSize = size; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 52 | } |
| 53 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 54 | // This member parses an ArchiveMemberHeader that is presumed to be pointed to |
| 55 | // by At. The At pointer is updated to the byte just after the header, which |
| 56 | // can be variable in size. |
| 57 | ArchiveMember* |
| 58 | Archive::parseMemberHeader(const char*& At, const char* End) { |
| 59 | assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data"); |
| 60 | |
| 61 | // Cast archive member header |
| 62 | ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; |
| 63 | At += sizeof(ArchiveMemberHeader); |
| 64 | |
| 65 | // Instantiate the ArchiveMember to be filled |
| 66 | ArchiveMember* member = new ArchiveMember(this); |
| 67 | |
| 68 | // Extract the size and determine if the file is |
| 69 | // compressed or not (negative length). |
| 70 | int flags = 0; |
| 71 | int MemberSize = atoi(Hdr->size); |
| 72 | if (MemberSize < 0) { |
| 73 | flags |= ArchiveMember::CompressedFlag; |
| 74 | MemberSize = -MemberSize; |
| 75 | } |
| 76 | |
| 77 | // Check the size of the member for sanity |
| 78 | if (At + MemberSize > End) |
| 79 | throw std::string("invalid member length in archive file"); |
| 80 | |
| 81 | // Check the member signature |
| 82 | if (!Hdr->checkSignature()) |
| 83 | throw std::string("invalid file member signature"); |
| 84 | |
| 85 | // Convert and check the member name |
| 86 | // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol |
| 87 | // table. The special name "//" and 14 blanks is for a string table, used |
| 88 | // for long file names. This library doesn't generate either of those but |
| 89 | // it will accept them. If the name starts with #1/ and the remainder is |
| 90 | // digits, then those digits specify the length of the name that is |
| 91 | // stored immediately following the header. The special name |
| 92 | // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode. |
| 93 | // Anything else is a regular, short filename that is terminated with |
| 94 | // a '/' and blanks. |
| 95 | |
| 96 | std::string pathname; |
| 97 | unsigned index; |
| 98 | switch (Hdr->name[0]) { |
| 99 | case '#': |
| 100 | if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { |
| 101 | if (isdigit(Hdr->name[3])) { |
| 102 | unsigned len = atoi(&Hdr->name[3]); |
| 103 | pathname.assign(At,len); |
| 104 | At += len + 1; // terminated by \n |
| 105 | flags |= ArchiveMember::HasLongFilenameFlag; |
| 106 | } else |
| 107 | throw std::string("invalid long filename"); |
| 108 | } else if (Hdr->name[1] == '_' && |
| 109 | (0==memcmp(Hdr->name,ARFILE_LLVM_SYMTAB_NAME,16))) { |
| 110 | // The member is using a long file name (>15 chars) format. |
| 111 | // This format is standard for 4.4BSD and Mac OSX operating |
| 112 | // systems. LLVM uses it similarly. In this format, the |
| 113 | // remainder of the name field (after #1/) specifies the |
| 114 | // length of the file name which occupy the first bytes of |
| 115 | // the member's data. The pathname already has the #1/ stripped. |
| 116 | pathname.assign(ARFILE_LLVM_SYMTAB_NAME); |
| 117 | flags |= ArchiveMember::LLVMSymbolTableFlag; |
| 118 | } |
| 119 | break; |
| 120 | case '/': |
| 121 | if (Hdr->name[1]== '/') { |
| 122 | if (0==memcmp(Hdr->name,ARFILE_STRTAB_NAME,16)) { |
| 123 | pathname.assign(ARFILE_STRTAB_NAME); |
| 124 | flags |= ArchiveMember::StringTableFlag; |
| 125 | } else { |
| 126 | throw std::string("invalid string table name"); |
| 127 | } |
| 128 | } else if (Hdr->name[1] == ' ') { |
| 129 | if (0==memcmp(Hdr->name,ARFILE_SYMTAB_NAME,16)) { |
| 130 | pathname.assign(ARFILE_SYMTAB_NAME); |
| 131 | flags |= ArchiveMember::ForeignSymbolTableFlag; |
| 132 | } else { |
| 133 | throw std::string("invalid foreign symbol table name"); |
| 134 | } |
| 135 | } else if (isdigit(Hdr->name[1])) { |
| 136 | unsigned index = atoi(&Hdr->name[1]); |
| 137 | if (index < strtab.length()) { |
| 138 | const char* namep = strtab.c_str() + index; |
| 139 | const char* endp = strtab.c_str() + strtab.length(); |
| 140 | const char* p = namep; |
| 141 | const char* last_p = p; |
| 142 | while (p < endp) { |
| 143 | if (*p == '\n' && *last_p == '/') { |
| 144 | pathname.assign(namep,last_p-namep); |
| 145 | flags |= ArchiveMember::HasLongFilenameFlag; |
| 146 | break; |
| 147 | } |
| 148 | last_p = p; |
| 149 | p++; |
| 150 | } |
| 151 | if (p >= endp) |
| 152 | throw std::string("missing name termiantor in string table"); |
| 153 | } else { |
| 154 | throw std::string("name index beyond string table"); |
| 155 | } |
| 156 | } |
| 157 | break; |
| 158 | |
| 159 | default: |
| 160 | char* slash = (char*) memchr(Hdr->name,'/',16); |
| 161 | if (slash == 0) |
| 162 | throw std::string("missing name terminator"); |
| 163 | pathname.assign(Hdr->name,slash-Hdr->name); |
| 164 | break; |
| 165 | } |
| 166 | |
| 167 | // Determine if this is a bytecode file |
| 168 | switch (sys::IdentifyFileType(At,4)) { |
| 169 | case sys::BytecodeFileType: |
| 170 | flags |= ArchiveMember::BytecodeFlag; |
| 171 | break; |
| 172 | case sys::CompressedBytecodeFileType: |
| 173 | flags |= ArchiveMember::CompressedBytecodeFlag; |
| 174 | flags &= ~ArchiveMember::CompressedFlag; |
| 175 | break; |
| 176 | default: |
| 177 | flags &= ~(ArchiveMember::BytecodeFlag| |
| 178 | ArchiveMember::CompressedBytecodeFlag); |
| 179 | break; |
| 180 | } |
| 181 | |
| 182 | // Fill in fields of the ArchiveMember |
| 183 | member->next = 0; |
| 184 | member->prev = 0; |
| 185 | member->parent = this; |
| 186 | member->path.setFile(pathname); |
| 187 | member->info.fileSize = MemberSize; |
| 188 | member->info.modTime.fromEpochTime(atoi(Hdr->date)); |
| 189 | sscanf(Hdr->mode,"%o",&(member->info.mode)); |
| 190 | member->info.user = atoi(Hdr->uid); |
| 191 | member->info.group = atoi(Hdr->gid); |
| 192 | member->flags = flags; |
| 193 | member->data = At; |
| 194 | |
| 195 | return member; |
| 196 | } |
| 197 | |
| 198 | void |
| 199 | Archive::checkSignature() { |
| 200 | // Check the magic string at file's header |
| 201 | if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC,8)) |
| 202 | throw std::string("invalid signature for an archive file"); |
| 203 | } |
| 204 | |
| 205 | // This function loads the entire archive and fully populates its ilist with |
| 206 | // the members of the archive file. This is typically used in preparation for |
| 207 | // editing the contents of the archive. |
| 208 | void |
| 209 | Archive::loadArchive() { |
| 210 | |
| 211 | // Set up parsing |
| 212 | members.clear(); |
| 213 | symTab.clear(); |
| 214 | const char *At = base; |
| 215 | const char *End = base + mapfile->size(); |
| 216 | |
| 217 | checkSignature(); |
| 218 | At += 8; // Skip the magic string. |
| 219 | |
| 220 | bool seenSymbolTable = false; |
| 221 | bool foundFirstFile = false; |
| 222 | while (At < End) { |
| 223 | // parse the member header |
| 224 | const char* Save = At; |
| 225 | ArchiveMember* mbr = parseMemberHeader(At, End); |
| 226 | |
| 227 | // check if this is the foreign symbol table |
| 228 | if (mbr->isForeignSymbolTable()) { |
| 229 | // We don't do anything with this but delete it |
| 230 | At += mbr->getSize(); |
| 231 | delete mbr; |
| 232 | if ((int(At) & 1) == 1) |
| 233 | At++; |
| 234 | } else if (mbr->isStringTable()) { |
| 235 | strtab.assign(At,mbr->getSize()); |
| 236 | At += mbr->getSize(); |
| 237 | if ((int(At) & 1) == 1) |
| 238 | At++; |
| 239 | delete mbr; |
| 240 | } else if (mbr->isLLVMSymbolTable()) { |
| 241 | if (seenSymbolTable) |
| 242 | throw std::string("invalid archive: multiple symbol tables"); |
| 243 | parseSymbolTable(mbr->getData(),mbr->getSize()); |
| 244 | seenSymbolTable = true; |
| 245 | At += mbr->getSize(); |
| 246 | if ((int(At) & 1) == 1) |
| 247 | At++; |
| 248 | delete mbr; |
| 249 | } else { |
| 250 | if (!foundFirstFile) { |
| 251 | firstFileOffset = Save - base; |
| 252 | foundFirstFile = true; |
| 253 | } |
| 254 | members.push_back(mbr); |
| 255 | At += mbr->getSize(); |
| 256 | if ((int(At) & 1) == 1) |
| 257 | At++; |
| 258 | } |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | // Open and completely load the archive file. |
| 263 | Archive* |
| 264 | Archive::OpenAndLoad(const sys::Path& file) { |
| 265 | |
| 266 | Archive* result = new Archive(file,true); |
| 267 | |
| 268 | result->loadArchive(); |
| 269 | |
| 270 | return result; |
| 271 | } |
| 272 | |
| 273 | // Get all the bytecode modules from the archive |
| 274 | bool |
| 275 | Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { |
| 276 | |
| 277 | for (iterator I=begin(), E=end(); I != E; ++I) { |
| 278 | if (I->isBytecode() || I->isCompressedBytecode()) { |
| 279 | Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), |
| 280 | I->getSize(), I->getPath().get(), ErrMessage); |
| 281 | if (!M) |
| 282 | return true; |
| 283 | |
| 284 | Modules.push_back(M); |
| 285 | } |
| 286 | } |
Brian Gaeke | 2c61d7b | 2003-11-16 23:08:48 +0000 | [diff] [blame] | 287 | return false; |
| 288 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 289 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 290 | // Load just the symbol table from the archive file |
| 291 | void |
| 292 | Archive::loadSymbolTable() { |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 293 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 294 | // Set up parsing |
| 295 | members.clear(); |
| 296 | symTab.clear(); |
| 297 | const char *At = base; |
| 298 | const char *End = base + mapfile->size(); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 299 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 300 | // Make sure we're dealing with an archive |
| 301 | checkSignature(); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 302 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 303 | At += 8; // Skip signature |
| 304 | |
| 305 | // Parse the first file member header |
| 306 | const char* FirstFile = At; |
| 307 | ArchiveMember* mbr = parseMemberHeader(At, End); |
| 308 | |
| 309 | if (mbr->isForeignSymbolTable()) { |
| 310 | // Skip the foreign symbol table, we don't do anything with it |
| 311 | At += mbr->getSize(); |
| 312 | delete mbr; |
| 313 | |
| 314 | // See if there's a string table too |
| 315 | FirstFile = At; |
| 316 | mbr = parseMemberHeader(At,End); |
| 317 | if (mbr->isStringTable()) { |
| 318 | strtab.assign((const char*)mbr->getData(),mbr->getSize()); |
| 319 | At += mbr->getSize(); |
| 320 | delete mbr; |
| 321 | FirstFile = At; |
| 322 | mbr = parseMemberHeader(At,End); |
Brian Gaeke | 2c61d7b | 2003-11-16 23:08:48 +0000 | [diff] [blame] | 323 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 324 | } |
| 325 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 326 | // See if its the symbol table |
| 327 | if (mbr->isLLVMSymbolTable()) { |
| 328 | parseSymbolTable(mbr->getData(),mbr->getSize()); |
| 329 | FirstFile = At + mbr->getSize(); |
| 330 | if (mbr->getSize() % 2 != 0) |
| 331 | FirstFile++; |
| 332 | } else { |
| 333 | // There's no symbol table in the file. We have to rebuild it from scratch |
| 334 | // because the intent of this method is to get the symbol table loaded so |
| 335 | // it can be searched efficiently. |
| 336 | // Add the member to the members list |
| 337 | members.push_back(mbr); |
| 338 | } |
| 339 | |
| 340 | firstFileOffset = FirstFile - base; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 341 | } |
| 342 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 343 | // Open the archive and load just the symbol tables |
| 344 | Archive* |
| 345 | Archive::OpenAndLoadSymbols(const sys::Path& file) { |
| 346 | Archive* result = new Archive(file,true); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 347 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 348 | result->loadSymbolTable(); |
Chris Lattner | b70abe1 | 2003-12-30 07:40:35 +0000 | [diff] [blame] | 349 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 350 | return result; |
| 351 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 352 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 353 | // Look up one symbol in the symbol table and return a ModuleProvider for the |
| 354 | // module that defines that symbol. |
| 355 | ModuleProvider* |
| 356 | Archive::findModuleDefiningSymbol(const std::string& symbol) { |
| 357 | SymTabType::iterator SI = symTab.find(symbol); |
| 358 | if (SI == symTab.end()) |
| 359 | return 0; |
| 360 | |
| 361 | // The symbol table was previously constructed assuming that the members were |
| 362 | // written without the symbol table header. Because VBR encoding is used, the |
| 363 | // values could not be adjusted to account for the offset of the symbol table |
| 364 | // because that could affect the size of the symbol table due to VBR encoding. |
| 365 | // We now have to account for this by adjusting the offset by the size of the |
| 366 | // symbol table and its header. |
| 367 | unsigned fileOffset = |
| 368 | SI->second + // offset in symbol-table-less file |
| 369 | firstFileOffset; // add offset to first "real" file in archive |
| 370 | |
| 371 | // See if the module is already loaded |
| 372 | ModuleMap::iterator MI = modules.find(fileOffset); |
| 373 | if (MI != modules.end()) |
| 374 | return MI->second.first; |
| 375 | |
| 376 | // Module hasn't been loaded yet, we need to load it |
| 377 | const char* modptr = base + fileOffset; |
| 378 | ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size()); |
| 379 | |
| 380 | // Now, load the bytecode module to get the ModuleProvider |
| 381 | ModuleProvider* mp = getBytecodeBufferModuleProvider( |
| 382 | (const unsigned char*) mbr->getData(), mbr->getSize(), |
| 383 | mbr->getPath().get(), 0); |
| 384 | |
| 385 | modules.insert(std::make_pair(fileOffset,std::make_pair(mp,mbr))); |
| 386 | |
| 387 | return mp; |
| 388 | } |
| 389 | |
| 390 | // Look up multiple symbols in the symbol table and return a set of |
| 391 | // ModuleProviders that define those symbols. |
| 392 | void |
| 393 | Archive::findModulesDefiningSymbols(const std::set<std::string>& symbols, |
| 394 | std::set<ModuleProvider*>& modules) |
| 395 | { |
| 396 | for (std::set<std::string>::const_iterator I=symbols.begin(), |
| 397 | E=symbols.end(); I != E; ++I) { |
| 398 | ModuleProvider* mp = findModuleDefiningSymbol(*I); |
| 399 | if (mp) { |
| 400 | modules.insert(mp); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 401 | } |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame^] | 402 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 403 | } |