Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 1 | //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 2 | // |
John Criswell | b576c94 | 2003-10-20 19:43:21 +0000 | [diff] [blame] | 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 5 | // This file was developed by Reid Spencer and is distributed under the |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 6 | // University of Illinois Open Source License. See LICENSE.TXT for details. |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 7 | // |
John Criswell | b576c94 | 2003-10-20 19:43:21 +0000 | [diff] [blame] | 8 | //===----------------------------------------------------------------------===// |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 9 | // |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 10 | // Builds up standard unix archive files (.a) containing LLVM bytecode. |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 14 | #include "ArchiveInternals.h" |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 15 | #include "llvm/Bytecode/Reader.h" |
Duraid Madina | 8748746 | 2005-12-26 14:31:26 +0000 | [diff] [blame^] | 16 | #include <memory> |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 17 | |
Chris Lattner | 3446ae8 | 2004-01-10 19:00:15 +0000 | [diff] [blame] | 18 | using namespace llvm; |
Brian Gaeke | d0fde30 | 2003-11-11 22:41:34 +0000 | [diff] [blame] | 19 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 20 | /// Read a variable-bit-rate encoded unsigned integer |
| 21 | inline unsigned readInteger(const char*&At, const char*End) { |
| 22 | unsigned Shift = 0; |
| 23 | unsigned Result = 0; |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 24 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 25 | do { |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 26 | if (At == End) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 27 | throw std::string("Ran out of data reading vbr_uint!"); |
| 28 | Result |= (unsigned)((*At++) & 0x7F) << Shift; |
| 29 | Shift += 7; |
| 30 | } while (At[-1] & 0x80); |
| 31 | return Result; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 32 | } |
| 33 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 34 | // Completely parse the Archive's symbol table and populate symTab member var. |
| 35 | void |
| 36 | Archive::parseSymbolTable(const void* data, unsigned size) { |
| 37 | const char* At = (const char*) data; |
| 38 | const char* End = At + size; |
| 39 | while (At < End) { |
| 40 | unsigned offset = readInteger(At, End); |
| 41 | unsigned length = readInteger(At, End); |
| 42 | if (At + length > End) |
| 43 | throw std::string("malformed symbol table"); |
| 44 | // we don't care if it can't be inserted (duplicate entry) |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 45 | symTab.insert(std::make_pair(std::string(At, length), offset)); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 46 | At += length; |
| 47 | } |
| 48 | symTabSize = size; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 49 | } |
| 50 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 51 | // This member parses an ArchiveMemberHeader that is presumed to be pointed to |
| 52 | // by At. The At pointer is updated to the byte just after the header, which |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 53 | // can be variable in size. |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 54 | ArchiveMember* |
| 55 | Archive::parseMemberHeader(const char*& At, const char* End) { |
| 56 | assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data"); |
| 57 | |
| 58 | // Cast archive member header |
| 59 | ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; |
| 60 | At += sizeof(ArchiveMemberHeader); |
| 61 | |
| 62 | // Instantiate the ArchiveMember to be filled |
| 63 | ArchiveMember* member = new ArchiveMember(this); |
| 64 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 65 | // Extract the size and determine if the file is |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 66 | // compressed or not (negative length). |
| 67 | int flags = 0; |
| 68 | int MemberSize = atoi(Hdr->size); |
| 69 | if (MemberSize < 0) { |
| 70 | flags |= ArchiveMember::CompressedFlag; |
| 71 | MemberSize = -MemberSize; |
| 72 | } |
| 73 | |
| 74 | // Check the size of the member for sanity |
| 75 | if (At + MemberSize > End) |
| 76 | throw std::string("invalid member length in archive file"); |
| 77 | |
| 78 | // Check the member signature |
| 79 | if (!Hdr->checkSignature()) |
| 80 | throw std::string("invalid file member signature"); |
| 81 | |
| 82 | // Convert and check the member name |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 83 | // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol |
| 84 | // table. The special name "//" and 14 blanks is for a string table, used |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 85 | // for long file names. This library doesn't generate either of those but |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 86 | // it will accept them. If the name starts with #1/ and the remainder is |
| 87 | // digits, then those digits specify the length of the name that is |
| 88 | // stored immediately following the header. The special name |
| 89 | // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode. |
| 90 | // Anything else is a regular, short filename that is terminated with |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 91 | // a '/' and blanks. |
| 92 | |
| 93 | std::string pathname; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 94 | switch (Hdr->name[0]) { |
| 95 | case '#': |
| 96 | if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { |
| 97 | if (isdigit(Hdr->name[3])) { |
| 98 | unsigned len = atoi(&Hdr->name[3]); |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 99 | pathname.assign(At, len); |
Reid Spencer | dd95e8d | 2004-11-17 16:13:11 +0000 | [diff] [blame] | 100 | At += len; |
| 101 | MemberSize -= len; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 102 | flags |= ArchiveMember::HasLongFilenameFlag; |
| 103 | } else |
| 104 | throw std::string("invalid long filename"); |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 105 | } else if (Hdr->name[1] == '_' && |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 106 | (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 107 | // The member is using a long file name (>15 chars) format. |
| 108 | // This format is standard for 4.4BSD and Mac OSX operating |
| 109 | // systems. LLVM uses it similarly. In this format, the |
| 110 | // remainder of the name field (after #1/) specifies the |
| 111 | // length of the file name which occupy the first bytes of |
| 112 | // the member's data. The pathname already has the #1/ stripped. |
| 113 | pathname.assign(ARFILE_LLVM_SYMTAB_NAME); |
| 114 | flags |= ArchiveMember::LLVMSymbolTableFlag; |
| 115 | } |
| 116 | break; |
| 117 | case '/': |
| 118 | if (Hdr->name[1]== '/') { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 119 | if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 120 | pathname.assign(ARFILE_STRTAB_NAME); |
| 121 | flags |= ArchiveMember::StringTableFlag; |
| 122 | } else { |
| 123 | throw std::string("invalid string table name"); |
| 124 | } |
| 125 | } else if (Hdr->name[1] == ' ') { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 126 | if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { |
| 127 | pathname.assign(ARFILE_SVR4_SYMTAB_NAME); |
| 128 | flags |= ArchiveMember::SVR4SymbolTableFlag; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 129 | } else { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 130 | throw std::string("invalid SVR4 symbol table name"); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 131 | } |
| 132 | } else if (isdigit(Hdr->name[1])) { |
| 133 | unsigned index = atoi(&Hdr->name[1]); |
| 134 | if (index < strtab.length()) { |
| 135 | const char* namep = strtab.c_str() + index; |
| 136 | const char* endp = strtab.c_str() + strtab.length(); |
| 137 | const char* p = namep; |
| 138 | const char* last_p = p; |
| 139 | while (p < endp) { |
| 140 | if (*p == '\n' && *last_p == '/') { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 141 | pathname.assign(namep, last_p - namep); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 142 | flags |= ArchiveMember::HasLongFilenameFlag; |
| 143 | break; |
| 144 | } |
| 145 | last_p = p; |
| 146 | p++; |
| 147 | } |
| 148 | if (p >= endp) |
| 149 | throw std::string("missing name termiantor in string table"); |
| 150 | } else { |
| 151 | throw std::string("name index beyond string table"); |
| 152 | } |
| 153 | } |
| 154 | break; |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 155 | case '_': |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 156 | if (Hdr->name[1] == '_' && |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 157 | (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { |
| 158 | pathname.assign(ARFILE_BSD4_SYMTAB_NAME); |
| 159 | flags |= ArchiveMember::BSD4SymbolTableFlag; |
Reid Spencer | 84b9ced | 2004-11-23 22:35:39 +0000 | [diff] [blame] | 160 | break; |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 161 | } |
Reid Spencer | 84b9ced | 2004-11-23 22:35:39 +0000 | [diff] [blame] | 162 | /* FALL THROUGH */ |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 163 | |
| 164 | default: |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 165 | char* slash = (char*) memchr(Hdr->name, '/', 16); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 166 | if (slash == 0) |
Reid Spencer | dd95e8d | 2004-11-17 16:13:11 +0000 | [diff] [blame] | 167 | slash = Hdr->name + 16; |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 168 | pathname.assign(Hdr->name, slash - Hdr->name); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 169 | break; |
| 170 | } |
| 171 | |
| 172 | // Determine if this is a bytecode file |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 173 | switch (sys::IdentifyFileType(At, 4)) { |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 174 | case sys::BytecodeFileType: |
| 175 | flags |= ArchiveMember::BytecodeFlag; |
| 176 | break; |
| 177 | case sys::CompressedBytecodeFileType: |
| 178 | flags |= ArchiveMember::CompressedBytecodeFlag; |
| 179 | flags &= ~ArchiveMember::CompressedFlag; |
| 180 | break; |
| 181 | default: |
| 182 | flags &= ~(ArchiveMember::BytecodeFlag| |
| 183 | ArchiveMember::CompressedBytecodeFlag); |
| 184 | break; |
| 185 | } |
| 186 | |
| 187 | // Fill in fields of the ArchiveMember |
| 188 | member->next = 0; |
| 189 | member->prev = 0; |
| 190 | member->parent = this; |
Reid Spencer | dd04df0 | 2005-07-07 23:21:43 +0000 | [diff] [blame] | 191 | member->path.set(pathname); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 192 | member->info.fileSize = MemberSize; |
| 193 | member->info.modTime.fromEpochTime(atoi(Hdr->date)); |
Reid Spencer | 5612762 | 2004-12-29 01:20:24 +0000 | [diff] [blame] | 194 | unsigned int mode; |
| 195 | sscanf(Hdr->mode, "%o", &mode); |
| 196 | member->info.mode = mode; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 197 | member->info.user = atoi(Hdr->uid); |
| 198 | member->info.group = atoi(Hdr->gid); |
| 199 | member->flags = flags; |
| 200 | member->data = At; |
| 201 | |
| 202 | return member; |
| 203 | } |
| 204 | |
| 205 | void |
| 206 | Archive::checkSignature() { |
| 207 | // Check the magic string at file's header |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 208 | if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8)) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 209 | throw std::string("invalid signature for an archive file"); |
| 210 | } |
| 211 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 212 | // This function loads the entire archive and fully populates its ilist with |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 213 | // the members of the archive file. This is typically used in preparation for |
| 214 | // editing the contents of the archive. |
| 215 | void |
| 216 | Archive::loadArchive() { |
| 217 | |
| 218 | // Set up parsing |
| 219 | members.clear(); |
| 220 | symTab.clear(); |
| 221 | const char *At = base; |
| 222 | const char *End = base + mapfile->size(); |
| 223 | |
| 224 | checkSignature(); |
| 225 | At += 8; // Skip the magic string. |
| 226 | |
| 227 | bool seenSymbolTable = false; |
| 228 | bool foundFirstFile = false; |
| 229 | while (At < End) { |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 230 | // parse the member header |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 231 | const char* Save = At; |
| 232 | ArchiveMember* mbr = parseMemberHeader(At, End); |
| 233 | |
| 234 | // check if this is the foreign symbol table |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 235 | if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 236 | // We just save this but don't do anything special |
| 237 | // with it. It doesn't count as the "first file". |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 238 | if (foreignST) { |
| 239 | // What? Multiple foreign symbol tables? Just chuck it |
| 240 | // and retain the last one found. |
| 241 | delete foreignST; |
| 242 | } |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 243 | foreignST = mbr; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 244 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 245 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 246 | At++; |
| 247 | } else if (mbr->isStringTable()) { |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 248 | // Simply suck the entire string table into a string |
| 249 | // variable. This will be used to get the names of the |
| 250 | // members that use the "/ddd" format for their names |
| 251 | // (SVR4 style long names). |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 252 | strtab.assign(At, mbr->getSize()); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 253 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 254 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 255 | At++; |
| 256 | delete mbr; |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 257 | } else if (mbr->isLLVMSymbolTable()) { |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 258 | // This is the LLVM symbol table for the archive. If we've seen it |
| 259 | // already, its an error. Otherwise, parse the symbol table and move on. |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 260 | if (seenSymbolTable) |
| 261 | throw std::string("invalid archive: multiple symbol tables"); |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 262 | parseSymbolTable(mbr->getData(), mbr->getSize()); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 263 | seenSymbolTable = true; |
| 264 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 265 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 266 | At++; |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 267 | delete mbr; // We don't need this member in the list of members. |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 268 | } else { |
Reid Spencer | 4a980d1 | 2004-11-16 06:47:19 +0000 | [diff] [blame] | 269 | // This is just a regular file. If its the first one, save its offset. |
| 270 | // Otherwise just push it on the list and move on to the next file. |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 271 | if (!foundFirstFile) { |
| 272 | firstFileOffset = Save - base; |
| 273 | foundFirstFile = true; |
| 274 | } |
| 275 | members.push_back(mbr); |
| 276 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 277 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 278 | At++; |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // Open and completely load the archive file. |
| 284 | Archive* |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 285 | Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) { |
| 286 | try { |
Reid Spencer | 518ec2e | 2004-12-13 03:22:31 +0000 | [diff] [blame] | 287 | std::auto_ptr<Archive> result ( new Archive(file, true)); |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 288 | result->loadArchive(); |
Reid Spencer | 518ec2e | 2004-12-13 03:22:31 +0000 | [diff] [blame] | 289 | return result.release(); |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 290 | } catch (const std::string& msg) { |
| 291 | if (ErrorMessage) { |
| 292 | *ErrorMessage = msg; |
| 293 | } |
| 294 | return 0; |
| 295 | } |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 296 | } |
| 297 | |
| 298 | // Get all the bytecode modules from the archive |
| 299 | bool |
| 300 | Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { |
| 301 | |
| 302 | for (iterator I=begin(), E=end(); I != E; ++I) { |
| 303 | if (I->isBytecode() || I->isCompressedBytecode()) { |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 304 | std::string FullMemberName = archPath.toString() + |
Reid Spencer | 1fce091 | 2004-12-11 00:14:15 +0000 | [diff] [blame] | 305 | "(" + I->getPath().toString() + ")"; |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 306 | Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), |
Reid Spencer | 6ab7a4f | 2004-11-17 18:25:21 +0000 | [diff] [blame] | 307 | I->getSize(), FullMemberName, ErrMessage); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 308 | if (!M) |
| 309 | return true; |
| 310 | |
| 311 | Modules.push_back(M); |
| 312 | } |
| 313 | } |
Brian Gaeke | 2c61d7b | 2003-11-16 23:08:48 +0000 | [diff] [blame] | 314 | return false; |
| 315 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 316 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 317 | // Load just the symbol table from the archive file |
| 318 | void |
| 319 | Archive::loadSymbolTable() { |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 320 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 321 | // Set up parsing |
| 322 | members.clear(); |
| 323 | symTab.clear(); |
| 324 | const char *At = base; |
| 325 | const char *End = base + mapfile->size(); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 326 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 327 | // Make sure we're dealing with an archive |
| 328 | checkSignature(); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 329 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 330 | At += 8; // Skip signature |
| 331 | |
| 332 | // Parse the first file member header |
| 333 | const char* FirstFile = At; |
| 334 | ArchiveMember* mbr = parseMemberHeader(At, End); |
| 335 | |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 336 | if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 337 | // Skip the foreign symbol table, we don't do anything with it |
| 338 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 339 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 340 | At++; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 341 | delete mbr; |
| 342 | |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 343 | // Read the next one |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 344 | FirstFile = At; |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 345 | mbr = parseMemberHeader(At, End); |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 346 | } |
| 347 | |
| 348 | if (mbr->isStringTable()) { |
| 349 | // Process the string table entry |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 350 | strtab.assign((const char*)mbr->getData(), mbr->getSize()); |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 351 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 352 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 353 | At++; |
| 354 | delete mbr; |
| 355 | // Get the next one |
| 356 | FirstFile = At; |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 357 | mbr = parseMemberHeader(At, End); |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 358 | } |
| 359 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 360 | // See if its the symbol table |
| 361 | if (mbr->isLLVMSymbolTable()) { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 362 | parseSymbolTable(mbr->getData(), mbr->getSize()); |
Reid Spencer | 8dde18f | 2004-11-28 03:13:02 +0000 | [diff] [blame] | 363 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 364 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | 8dde18f | 2004-11-28 03:13:02 +0000 | [diff] [blame] | 365 | At++; |
| 366 | FirstFile = At; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 367 | } else { |
| 368 | // There's no symbol table in the file. We have to rebuild it from scratch |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 369 | // because the intent of this method is to get the symbol table loaded so |
| 370 | // it can be searched efficiently. |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 371 | // Add the member to the members list |
| 372 | members.push_back(mbr); |
| 373 | } |
| 374 | |
| 375 | firstFileOffset = FirstFile - base; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 376 | } |
| 377 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 378 | // Open the archive and load just the symbol tables |
| 379 | Archive* |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 380 | Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { |
| 381 | try { |
Reid Spencer | 518ec2e | 2004-12-13 03:22:31 +0000 | [diff] [blame] | 382 | std::auto_ptr<Archive> result ( new Archive(file, true) ); |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 383 | result->loadSymbolTable(); |
Reid Spencer | 518ec2e | 2004-12-13 03:22:31 +0000 | [diff] [blame] | 384 | return result.release(); |
Reid Spencer | 5af4688 | 2004-12-13 02:59:03 +0000 | [diff] [blame] | 385 | } catch (const std::string& msg) { |
| 386 | if (ErrorMessage) { |
| 387 | *ErrorMessage = msg; |
| 388 | } |
| 389 | return 0; |
| 390 | } |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 391 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 392 | |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 393 | // Look up one symbol in the symbol table and return a ModuleProvider for the |
| 394 | // module that defines that symbol. |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 395 | ModuleProvider* |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 396 | Archive::findModuleDefiningSymbol(const std::string& symbol) { |
| 397 | SymTabType::iterator SI = symTab.find(symbol); |
| 398 | if (SI == symTab.end()) |
| 399 | return 0; |
| 400 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 401 | // The symbol table was previously constructed assuming that the members were |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 402 | // written without the symbol table header. Because VBR encoding is used, the |
| 403 | // values could not be adjusted to account for the offset of the symbol table |
| 404 | // because that could affect the size of the symbol table due to VBR encoding. |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 405 | // We now have to account for this by adjusting the offset by the size of the |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 406 | // symbol table and its header. |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 407 | unsigned fileOffset = |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 408 | SI->second + // offset in symbol-table-less file |
| 409 | firstFileOffset; // add offset to first "real" file in archive |
| 410 | |
| 411 | // See if the module is already loaded |
| 412 | ModuleMap::iterator MI = modules.find(fileOffset); |
| 413 | if (MI != modules.end()) |
| 414 | return MI->second.first; |
| 415 | |
| 416 | // Module hasn't been loaded yet, we need to load it |
| 417 | const char* modptr = base + fileOffset; |
| 418 | ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size()); |
| 419 | |
| 420 | // Now, load the bytecode module to get the ModuleProvider |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 421 | std::string FullMemberName = archPath.toString() + "(" + |
Reid Spencer | 1fce091 | 2004-12-11 00:14:15 +0000 | [diff] [blame] | 422 | mbr->getPath().toString() + ")"; |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 423 | ModuleProvider* mp = getBytecodeBufferModuleProvider( |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 424 | (const unsigned char*) mbr->getData(), mbr->getSize(), |
Reid Spencer | 6ab7a4f | 2004-11-17 18:25:21 +0000 | [diff] [blame] | 425 | FullMemberName, 0); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 426 | |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 427 | modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 428 | |
| 429 | return mp; |
| 430 | } |
| 431 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 432 | // Look up multiple symbols in the symbol table and return a set of |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 433 | // ModuleProviders that define those symbols. |
| 434 | void |
Reid Spencer | 7783e8a | 2004-11-19 03:18:22 +0000 | [diff] [blame] | 435 | Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 436 | std::set<ModuleProvider*>& result) |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 437 | { |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 438 | assert(mapfile && base && "Can't findModulesDefiningSymbols on new archive"); |
| 439 | if (symTab.empty()) { |
| 440 | // We don't have a symbol table, so we must build it now but lets also |
| 441 | // make sure that we populate the modules table as we do this to ensure |
| 442 | // that we don't load them twice when findModuleDefiningSymbol is called |
| 443 | // below. |
| 444 | |
| 445 | // Get a pointer to the first file |
| 446 | const char* At = ((const char*)base) + firstFileOffset; |
| 447 | const char* End = ((const char*)base) + mapfile->size(); |
| 448 | |
| 449 | while ( At < End) { |
| 450 | // Compute the offset to be put in the symbol table |
| 451 | unsigned offset = At - base - firstFileOffset; |
| 452 | |
| 453 | // Parse the file's header |
| 454 | ArchiveMember* mbr = parseMemberHeader(At, End); |
| 455 | |
| 456 | // If it contains symbols |
| 457 | if (mbr->isBytecode() || mbr->isCompressedBytecode()) { |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 458 | // Get the symbols |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 459 | std::vector<std::string> symbols; |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 460 | std::string FullMemberName = archPath.toString() + "(" + |
Reid Spencer | 1fce091 | 2004-12-11 00:14:15 +0000 | [diff] [blame] | 461 | mbr->getPath().toString() + ")"; |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 462 | ModuleProvider* MP = GetBytecodeSymbols((const unsigned char*)At, |
Reid Spencer | 6ab7a4f | 2004-11-17 18:25:21 +0000 | [diff] [blame] | 463 | mbr->getSize(), FullMemberName, symbols); |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 464 | |
| 465 | if (MP) { |
| 466 | // Insert the module's symbols into the symbol table |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 467 | for (std::vector<std::string>::iterator I = symbols.begin(), |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 468 | E=symbols.end(); I != E; ++I ) { |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 469 | symTab.insert(std::make_pair(*I, offset)); |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 470 | } |
| 471 | // Insert the ModuleProvider and the ArchiveMember into the table of |
| 472 | // modules. |
Reid Spencer | 9a29db4 | 2004-11-20 07:29:40 +0000 | [diff] [blame] | 473 | modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 474 | } else { |
| 475 | throw std::string("Can't parse bytecode member: ") + |
Reid Spencer | 1fce091 | 2004-12-11 00:14:15 +0000 | [diff] [blame] | 476 | mbr->getPath().toString(); |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 477 | } |
| 478 | } |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 479 | |
| 480 | // Go to the next file location |
| 481 | At += mbr->getSize(); |
Reid Spencer | 6405c9e | 2004-11-19 17:08:00 +0000 | [diff] [blame] | 482 | if ((intptr_t(At) & 1) == 1) |
Reid Spencer | b323113 | 2004-11-15 01:40:20 +0000 | [diff] [blame] | 483 | At++; |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 484 | } |
| 485 | } |
| 486 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 487 | // At this point we have a valid symbol table (one way or another) so we |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 488 | // just use it to quickly find the symbols requested. |
| 489 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 490 | for (std::set<std::string>::iterator I=symbols.begin(), |
Reid Spencer | 7783e8a | 2004-11-19 03:18:22 +0000 | [diff] [blame] | 491 | E=symbols.end(); I != E;) { |
| 492 | // See if this symbol exists |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 493 | ModuleProvider* mp = findModuleDefiningSymbol(*I); |
| 494 | if (mp) { |
Reid Spencer | 7783e8a | 2004-11-19 03:18:22 +0000 | [diff] [blame] | 495 | // The symbol exists, insert the ModuleProvider into our result, |
| 496 | // duplicates wil be ignored |
Reid Spencer | 766b793 | 2004-11-15 01:20:11 +0000 | [diff] [blame] | 497 | result.insert(mp); |
Reid Spencer | 7783e8a | 2004-11-19 03:18:22 +0000 | [diff] [blame] | 498 | |
Misha Brukman | 2b37d7c | 2005-04-21 21:13:18 +0000 | [diff] [blame] | 499 | // Remove the symbol now that its been resolved, being careful to |
Reid Spencer | 57646ec | 2004-11-19 03:44:10 +0000 | [diff] [blame] | 500 | // post-increment the iterator. |
| 501 | symbols.erase(I++); |
Reid Spencer | 7783e8a | 2004-11-19 03:18:22 +0000 | [diff] [blame] | 502 | } else { |
| 503 | ++I; |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 504 | } |
Reid Spencer | f9d7a51 | 2004-11-14 21:58:33 +0000 | [diff] [blame] | 505 | } |
Chris Lattner | 968cfd0 | 2003-04-19 21:45:34 +0000 | [diff] [blame] | 506 | } |
Reid Spencer | eaa06bb | 2005-02-26 22:00:32 +0000 | [diff] [blame] | 507 | |
Chris Lattner | 7c43992 | 2005-09-23 06:22:58 +0000 | [diff] [blame] | 508 | bool Archive::isBytecodeArchive() { |
| 509 | // Make sure the symTab has been loaded. In most cases this should have been |
| 510 | // done when the archive was constructed, but still, this is just in case. |
| 511 | if (!symTab.size()) |
Reid Spencer | eaa06bb | 2005-02-26 22:00:32 +0000 | [diff] [blame] | 512 | loadSymbolTable(); |
| 513 | |
Chris Lattner | 7c43992 | 2005-09-23 06:22:58 +0000 | [diff] [blame] | 514 | // Now that we know it's been loaded, return true |
| 515 | // if it has a size |
| 516 | if (symTab.size()) return true; |
Reid Spencer | eaa06bb | 2005-02-26 22:00:32 +0000 | [diff] [blame] | 517 | |
| 518 | //We still can't be sure it isn't a bytecode archive |
| 519 | loadArchive(); |
| 520 | |
| 521 | std::vector<Module *> Modules; |
| 522 | std::string ErrorMessage; |
| 523 | |
Chris Lattner | 7c43992 | 2005-09-23 06:22:58 +0000 | [diff] [blame] | 524 | // Scan the archive, trying to load a bytecode member. We only load one to |
| 525 | // see if this works. |
| 526 | for (iterator I = begin(), E = end(); I != E; ++I) { |
| 527 | if (!I->isBytecode() && !I->isCompressedBytecode()) |
| 528 | continue; |
| 529 | |
| 530 | std::string FullMemberName = |
| 531 | archPath.toString() + "(" + I->getPath().toString() + ")"; |
| 532 | Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), |
| 533 | I->getSize(), FullMemberName); |
| 534 | if (!M) |
| 535 | return false; // Couldn't parse bytecode, not a bytecode archive. |
| 536 | delete M; |
| 537 | return true; |
| 538 | } |
| 539 | |
| 540 | return false; |
Reid Spencer | eaa06bb | 2005-02-26 22:00:32 +0000 | [diff] [blame] | 541 | } |