blob: ff8c9bcb0321c3a7a8e551c89e6407d239462f21 [file] [log] [blame]
Reid Spencerf9d7a512004-11-14 21:58:33 +00001//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
Misha Brukman2b37d7c2005-04-21 21:13:18 +00002//
John Criswellb576c942003-10-20 19:43:21 +00003// The LLVM Compiler Infrastructure
4//
Misha Brukman2b37d7c2005-04-21 21:13:18 +00005// This file was developed by Reid Spencer and is distributed under the
Reid Spencerf9d7a512004-11-14 21:58:33 +00006// University of Illinois Open Source License. See LICENSE.TXT for details.
Misha Brukman2b37d7c2005-04-21 21:13:18 +00007//
John Criswellb576c942003-10-20 19:43:21 +00008//===----------------------------------------------------------------------===//
Chris Lattner968cfd02003-04-19 21:45:34 +00009//
Reid Spencerf9d7a512004-11-14 21:58:33 +000010// Builds up standard unix archive files (.a) containing LLVM bytecode.
Chris Lattner968cfd02003-04-19 21:45:34 +000011//
12//===----------------------------------------------------------------------===//
13
Reid Spencerf9d7a512004-11-14 21:58:33 +000014#include "ArchiveInternals.h"
Chris Lattner968cfd02003-04-19 21:45:34 +000015#include "llvm/Bytecode/Reader.h"
Reid Spencerf9d7a512004-11-14 21:58:33 +000016
Chris Lattner3446ae82004-01-10 19:00:15 +000017using namespace llvm;
Brian Gaeked0fde302003-11-11 22:41:34 +000018
Reid Spencerf9d7a512004-11-14 21:58:33 +000019/// Read a variable-bit-rate encoded unsigned integer
20inline unsigned readInteger(const char*&At, const char*End) {
21 unsigned Shift = 0;
22 unsigned Result = 0;
Misha Brukman2b37d7c2005-04-21 21:13:18 +000023
Reid Spencerf9d7a512004-11-14 21:58:33 +000024 do {
Misha Brukman2b37d7c2005-04-21 21:13:18 +000025 if (At == End)
Reid Spencerf9d7a512004-11-14 21:58:33 +000026 throw std::string("Ran out of data reading vbr_uint!");
27 Result |= (unsigned)((*At++) & 0x7F) << Shift;
28 Shift += 7;
29 } while (At[-1] & 0x80);
30 return Result;
Chris Lattner968cfd02003-04-19 21:45:34 +000031}
32
Reid Spencerf9d7a512004-11-14 21:58:33 +000033// Completely parse the Archive's symbol table and populate symTab member var.
34void
35Archive::parseSymbolTable(const void* data, unsigned size) {
36 const char* At = (const char*) data;
37 const char* End = At + size;
38 while (At < End) {
39 unsigned offset = readInteger(At, End);
40 unsigned length = readInteger(At, End);
41 if (At + length > End)
42 throw std::string("malformed symbol table");
43 // we don't care if it can't be inserted (duplicate entry)
Reid Spencer9a29db42004-11-20 07:29:40 +000044 symTab.insert(std::make_pair(std::string(At, length), offset));
Reid Spencerf9d7a512004-11-14 21:58:33 +000045 At += length;
46 }
47 symTabSize = size;
Chris Lattner968cfd02003-04-19 21:45:34 +000048}
49
Reid Spencerf9d7a512004-11-14 21:58:33 +000050// This member parses an ArchiveMemberHeader that is presumed to be pointed to
51// by At. The At pointer is updated to the byte just after the header, which
Misha Brukman2b37d7c2005-04-21 21:13:18 +000052// can be variable in size.
Reid Spencerf9d7a512004-11-14 21:58:33 +000053ArchiveMember*
54Archive::parseMemberHeader(const char*& At, const char* End) {
55 assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data");
56
57 // Cast archive member header
58 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
59 At += sizeof(ArchiveMemberHeader);
60
61 // Instantiate the ArchiveMember to be filled
62 ArchiveMember* member = new ArchiveMember(this);
63
Misha Brukman2b37d7c2005-04-21 21:13:18 +000064 // Extract the size and determine if the file is
Reid Spencerf9d7a512004-11-14 21:58:33 +000065 // compressed or not (negative length).
66 int flags = 0;
67 int MemberSize = atoi(Hdr->size);
68 if (MemberSize < 0) {
69 flags |= ArchiveMember::CompressedFlag;
70 MemberSize = -MemberSize;
71 }
72
73 // Check the size of the member for sanity
74 if (At + MemberSize > End)
75 throw std::string("invalid member length in archive file");
76
77 // Check the member signature
78 if (!Hdr->checkSignature())
79 throw std::string("invalid file member signature");
80
81 // Convert and check the member name
Misha Brukman2b37d7c2005-04-21 21:13:18 +000082 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
83 // table. The special name "//" and 14 blanks is for a string table, used
Reid Spencerf9d7a512004-11-14 21:58:33 +000084 // for long file names. This library doesn't generate either of those but
Misha Brukman2b37d7c2005-04-21 21:13:18 +000085 // it will accept them. If the name starts with #1/ and the remainder is
86 // digits, then those digits specify the length of the name that is
87 // stored immediately following the header. The special name
88 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode.
89 // Anything else is a regular, short filename that is terminated with
Reid Spencerf9d7a512004-11-14 21:58:33 +000090 // a '/' and blanks.
91
92 std::string pathname;
Reid Spencerf9d7a512004-11-14 21:58:33 +000093 switch (Hdr->name[0]) {
94 case '#':
95 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
96 if (isdigit(Hdr->name[3])) {
97 unsigned len = atoi(&Hdr->name[3]);
Reid Spencer9a29db42004-11-20 07:29:40 +000098 pathname.assign(At, len);
Reid Spencerdd95e8d2004-11-17 16:13:11 +000099 At += len;
100 MemberSize -= len;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000101 flags |= ArchiveMember::HasLongFilenameFlag;
102 } else
103 throw std::string("invalid long filename");
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000104 } else if (Hdr->name[1] == '_' &&
Reid Spencer9a29db42004-11-20 07:29:40 +0000105 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
Reid Spencerf9d7a512004-11-14 21:58:33 +0000106 // The member is using a long file name (>15 chars) format.
107 // This format is standard for 4.4BSD and Mac OSX operating
108 // systems. LLVM uses it similarly. In this format, the
109 // remainder of the name field (after #1/) specifies the
110 // length of the file name which occupy the first bytes of
111 // the member's data. The pathname already has the #1/ stripped.
112 pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
113 flags |= ArchiveMember::LLVMSymbolTableFlag;
114 }
115 break;
116 case '/':
117 if (Hdr->name[1]== '/') {
Reid Spencer9a29db42004-11-20 07:29:40 +0000118 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
Reid Spencerf9d7a512004-11-14 21:58:33 +0000119 pathname.assign(ARFILE_STRTAB_NAME);
120 flags |= ArchiveMember::StringTableFlag;
121 } else {
122 throw std::string("invalid string table name");
123 }
124 } else if (Hdr->name[1] == ' ') {
Reid Spencer9a29db42004-11-20 07:29:40 +0000125 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
126 pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
127 flags |= ArchiveMember::SVR4SymbolTableFlag;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000128 } else {
Reid Spencer9a29db42004-11-20 07:29:40 +0000129 throw std::string("invalid SVR4 symbol table name");
Reid Spencerf9d7a512004-11-14 21:58:33 +0000130 }
131 } else if (isdigit(Hdr->name[1])) {
132 unsigned index = atoi(&Hdr->name[1]);
133 if (index < strtab.length()) {
134 const char* namep = strtab.c_str() + index;
135 const char* endp = strtab.c_str() + strtab.length();
136 const char* p = namep;
137 const char* last_p = p;
138 while (p < endp) {
139 if (*p == '\n' && *last_p == '/') {
Reid Spencer9a29db42004-11-20 07:29:40 +0000140 pathname.assign(namep, last_p - namep);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000141 flags |= ArchiveMember::HasLongFilenameFlag;
142 break;
143 }
144 last_p = p;
145 p++;
146 }
147 if (p >= endp)
148 throw std::string("missing name termiantor in string table");
149 } else {
150 throw std::string("name index beyond string table");
151 }
152 }
153 break;
Reid Spencer9a29db42004-11-20 07:29:40 +0000154 case '_':
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000155 if (Hdr->name[1] == '_' &&
Reid Spencer9a29db42004-11-20 07:29:40 +0000156 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
157 pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
158 flags |= ArchiveMember::BSD4SymbolTableFlag;
Reid Spencer84b9ced2004-11-23 22:35:39 +0000159 break;
Reid Spencer9a29db42004-11-20 07:29:40 +0000160 }
Reid Spencer84b9ced2004-11-23 22:35:39 +0000161 /* FALL THROUGH */
Reid Spencerf9d7a512004-11-14 21:58:33 +0000162
163 default:
Reid Spencer9a29db42004-11-20 07:29:40 +0000164 char* slash = (char*) memchr(Hdr->name, '/', 16);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000165 if (slash == 0)
Reid Spencerdd95e8d2004-11-17 16:13:11 +0000166 slash = Hdr->name + 16;
Reid Spencer9a29db42004-11-20 07:29:40 +0000167 pathname.assign(Hdr->name, slash - Hdr->name);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000168 break;
169 }
170
171 // Determine if this is a bytecode file
Reid Spencer9a29db42004-11-20 07:29:40 +0000172 switch (sys::IdentifyFileType(At, 4)) {
Reid Spencerf9d7a512004-11-14 21:58:33 +0000173 case sys::BytecodeFileType:
174 flags |= ArchiveMember::BytecodeFlag;
175 break;
176 case sys::CompressedBytecodeFileType:
177 flags |= ArchiveMember::CompressedBytecodeFlag;
178 flags &= ~ArchiveMember::CompressedFlag;
179 break;
180 default:
181 flags &= ~(ArchiveMember::BytecodeFlag|
182 ArchiveMember::CompressedBytecodeFlag);
183 break;
184 }
185
186 // Fill in fields of the ArchiveMember
187 member->next = 0;
188 member->prev = 0;
189 member->parent = this;
Reid Spencerdd04df02005-07-07 23:21:43 +0000190 member->path.set(pathname);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000191 member->info.fileSize = MemberSize;
192 member->info.modTime.fromEpochTime(atoi(Hdr->date));
Reid Spencer56127622004-12-29 01:20:24 +0000193 unsigned int mode;
194 sscanf(Hdr->mode, "%o", &mode);
195 member->info.mode = mode;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000196 member->info.user = atoi(Hdr->uid);
197 member->info.group = atoi(Hdr->gid);
198 member->flags = flags;
199 member->data = At;
200
201 return member;
202}
203
204void
205Archive::checkSignature() {
206 // Check the magic string at file's header
Reid Spencer9a29db42004-11-20 07:29:40 +0000207 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8))
Reid Spencerf9d7a512004-11-14 21:58:33 +0000208 throw std::string("invalid signature for an archive file");
209}
210
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000211// This function loads the entire archive and fully populates its ilist with
Reid Spencerf9d7a512004-11-14 21:58:33 +0000212// the members of the archive file. This is typically used in preparation for
213// editing the contents of the archive.
214void
215Archive::loadArchive() {
216
217 // Set up parsing
218 members.clear();
219 symTab.clear();
220 const char *At = base;
221 const char *End = base + mapfile->size();
222
223 checkSignature();
224 At += 8; // Skip the magic string.
225
226 bool seenSymbolTable = false;
227 bool foundFirstFile = false;
228 while (At < End) {
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000229 // parse the member header
Reid Spencerf9d7a512004-11-14 21:58:33 +0000230 const char* Save = At;
231 ArchiveMember* mbr = parseMemberHeader(At, End);
232
233 // check if this is the foreign symbol table
Reid Spencer9a29db42004-11-20 07:29:40 +0000234 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
Reid Spencer4a980d12004-11-16 06:47:19 +0000235 // We just save this but don't do anything special
236 // with it. It doesn't count as the "first file".
Reid Spencer9a29db42004-11-20 07:29:40 +0000237 if (foreignST) {
238 // What? Multiple foreign symbol tables? Just chuck it
239 // and retain the last one found.
240 delete foreignST;
241 }
Reid Spencer4a980d12004-11-16 06:47:19 +0000242 foreignST = mbr;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000243 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000244 if ((intptr_t(At) & 1) == 1)
Reid Spencerf9d7a512004-11-14 21:58:33 +0000245 At++;
246 } else if (mbr->isStringTable()) {
Reid Spencer4a980d12004-11-16 06:47:19 +0000247 // Simply suck the entire string table into a string
248 // variable. This will be used to get the names of the
249 // members that use the "/ddd" format for their names
250 // (SVR4 style long names).
Reid Spencer9a29db42004-11-20 07:29:40 +0000251 strtab.assign(At, mbr->getSize());
Reid Spencerf9d7a512004-11-14 21:58:33 +0000252 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000253 if ((intptr_t(At) & 1) == 1)
Reid Spencerf9d7a512004-11-14 21:58:33 +0000254 At++;
255 delete mbr;
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000256 } else if (mbr->isLLVMSymbolTable()) {
Reid Spencer4a980d12004-11-16 06:47:19 +0000257 // This is the LLVM symbol table for the archive. If we've seen it
258 // already, its an error. Otherwise, parse the symbol table and move on.
Reid Spencerf9d7a512004-11-14 21:58:33 +0000259 if (seenSymbolTable)
260 throw std::string("invalid archive: multiple symbol tables");
Reid Spencer9a29db42004-11-20 07:29:40 +0000261 parseSymbolTable(mbr->getData(), mbr->getSize());
Reid Spencerf9d7a512004-11-14 21:58:33 +0000262 seenSymbolTable = true;
263 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000264 if ((intptr_t(At) & 1) == 1)
Reid Spencerf9d7a512004-11-14 21:58:33 +0000265 At++;
Reid Spencer4a980d12004-11-16 06:47:19 +0000266 delete mbr; // We don't need this member in the list of members.
Reid Spencerf9d7a512004-11-14 21:58:33 +0000267 } else {
Reid Spencer4a980d12004-11-16 06:47:19 +0000268 // This is just a regular file. If its the first one, save its offset.
269 // Otherwise just push it on the list and move on to the next file.
Reid Spencerf9d7a512004-11-14 21:58:33 +0000270 if (!foundFirstFile) {
271 firstFileOffset = Save - base;
272 foundFirstFile = true;
273 }
274 members.push_back(mbr);
275 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000276 if ((intptr_t(At) & 1) == 1)
Reid Spencerf9d7a512004-11-14 21:58:33 +0000277 At++;
278 }
279 }
280}
281
282// Open and completely load the archive file.
283Archive*
Reid Spencer5af46882004-12-13 02:59:03 +0000284Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) {
285 try {
Reid Spencer518ec2e2004-12-13 03:22:31 +0000286 std::auto_ptr<Archive> result ( new Archive(file, true));
Reid Spencer5af46882004-12-13 02:59:03 +0000287 result->loadArchive();
Reid Spencer518ec2e2004-12-13 03:22:31 +0000288 return result.release();
Reid Spencer5af46882004-12-13 02:59:03 +0000289 } catch (const std::string& msg) {
290 if (ErrorMessage) {
291 *ErrorMessage = msg;
292 }
293 return 0;
294 }
Reid Spencerf9d7a512004-11-14 21:58:33 +0000295}
296
297// Get all the bytecode modules from the archive
298bool
299Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
300
301 for (iterator I=begin(), E=end(); I != E; ++I) {
302 if (I->isBytecode() || I->isCompressedBytecode()) {
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000303 std::string FullMemberName = archPath.toString() +
Reid Spencer1fce0912004-12-11 00:14:15 +0000304 "(" + I->getPath().toString() + ")";
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000305 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(),
Reid Spencer6ab7a4f2004-11-17 18:25:21 +0000306 I->getSize(), FullMemberName, ErrMessage);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000307 if (!M)
308 return true;
309
310 Modules.push_back(M);
311 }
312 }
Brian Gaeke2c61d7b2003-11-16 23:08:48 +0000313 return false;
314}
Chris Lattner968cfd02003-04-19 21:45:34 +0000315
Reid Spencerf9d7a512004-11-14 21:58:33 +0000316// Load just the symbol table from the archive file
317void
318Archive::loadSymbolTable() {
Chris Lattner968cfd02003-04-19 21:45:34 +0000319
Reid Spencerf9d7a512004-11-14 21:58:33 +0000320 // Set up parsing
321 members.clear();
322 symTab.clear();
323 const char *At = base;
324 const char *End = base + mapfile->size();
Chris Lattner968cfd02003-04-19 21:45:34 +0000325
Reid Spencerf9d7a512004-11-14 21:58:33 +0000326 // Make sure we're dealing with an archive
327 checkSignature();
Chris Lattner968cfd02003-04-19 21:45:34 +0000328
Reid Spencerf9d7a512004-11-14 21:58:33 +0000329 At += 8; // Skip signature
330
331 // Parse the first file member header
332 const char* FirstFile = At;
333 ArchiveMember* mbr = parseMemberHeader(At, End);
334
Reid Spencer9a29db42004-11-20 07:29:40 +0000335 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
Reid Spencerf9d7a512004-11-14 21:58:33 +0000336 // Skip the foreign symbol table, we don't do anything with it
337 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000338 if ((intptr_t(At) & 1) == 1)
Reid Spencerb3231132004-11-15 01:40:20 +0000339 At++;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000340 delete mbr;
341
Reid Spencerb3231132004-11-15 01:40:20 +0000342 // Read the next one
Reid Spencerf9d7a512004-11-14 21:58:33 +0000343 FirstFile = At;
Reid Spencer9a29db42004-11-20 07:29:40 +0000344 mbr = parseMemberHeader(At, End);
Reid Spencerb3231132004-11-15 01:40:20 +0000345 }
346
347 if (mbr->isStringTable()) {
348 // Process the string table entry
Reid Spencer9a29db42004-11-20 07:29:40 +0000349 strtab.assign((const char*)mbr->getData(), mbr->getSize());
Reid Spencerb3231132004-11-15 01:40:20 +0000350 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000351 if ((intptr_t(At) & 1) == 1)
Reid Spencerb3231132004-11-15 01:40:20 +0000352 At++;
353 delete mbr;
354 // Get the next one
355 FirstFile = At;
Reid Spencer9a29db42004-11-20 07:29:40 +0000356 mbr = parseMemberHeader(At, End);
Chris Lattner968cfd02003-04-19 21:45:34 +0000357 }
358
Reid Spencerf9d7a512004-11-14 21:58:33 +0000359 // See if its the symbol table
360 if (mbr->isLLVMSymbolTable()) {
Reid Spencer9a29db42004-11-20 07:29:40 +0000361 parseSymbolTable(mbr->getData(), mbr->getSize());
Reid Spencer8dde18f2004-11-28 03:13:02 +0000362 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000363 if ((intptr_t(At) & 1) == 1)
Reid Spencer8dde18f2004-11-28 03:13:02 +0000364 At++;
365 FirstFile = At;
Reid Spencerf9d7a512004-11-14 21:58:33 +0000366 } else {
367 // There's no symbol table in the file. We have to rebuild it from scratch
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000368 // because the intent of this method is to get the symbol table loaded so
369 // it can be searched efficiently.
Reid Spencerf9d7a512004-11-14 21:58:33 +0000370 // Add the member to the members list
371 members.push_back(mbr);
372 }
373
374 firstFileOffset = FirstFile - base;
Chris Lattner968cfd02003-04-19 21:45:34 +0000375}
376
Reid Spencerf9d7a512004-11-14 21:58:33 +0000377// Open the archive and load just the symbol tables
378Archive*
Reid Spencer5af46882004-12-13 02:59:03 +0000379Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) {
380 try {
Reid Spencer518ec2e2004-12-13 03:22:31 +0000381 std::auto_ptr<Archive> result ( new Archive(file, true) );
Reid Spencer5af46882004-12-13 02:59:03 +0000382 result->loadSymbolTable();
Reid Spencer518ec2e2004-12-13 03:22:31 +0000383 return result.release();
Reid Spencer5af46882004-12-13 02:59:03 +0000384 } catch (const std::string& msg) {
385 if (ErrorMessage) {
386 *ErrorMessage = msg;
387 }
388 return 0;
389 }
Reid Spencerf9d7a512004-11-14 21:58:33 +0000390}
Chris Lattner968cfd02003-04-19 21:45:34 +0000391
Reid Spencerf9d7a512004-11-14 21:58:33 +0000392// Look up one symbol in the symbol table and return a ModuleProvider for the
393// module that defines that symbol.
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000394ModuleProvider*
Reid Spencerf9d7a512004-11-14 21:58:33 +0000395Archive::findModuleDefiningSymbol(const std::string& symbol) {
396 SymTabType::iterator SI = symTab.find(symbol);
397 if (SI == symTab.end())
398 return 0;
399
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000400 // The symbol table was previously constructed assuming that the members were
Reid Spencerf9d7a512004-11-14 21:58:33 +0000401 // written without the symbol table header. Because VBR encoding is used, the
402 // values could not be adjusted to account for the offset of the symbol table
403 // because that could affect the size of the symbol table due to VBR encoding.
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000404 // We now have to account for this by adjusting the offset by the size of the
Reid Spencerf9d7a512004-11-14 21:58:33 +0000405 // symbol table and its header.
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000406 unsigned fileOffset =
Reid Spencerf9d7a512004-11-14 21:58:33 +0000407 SI->second + // offset in symbol-table-less file
408 firstFileOffset; // add offset to first "real" file in archive
409
410 // See if the module is already loaded
411 ModuleMap::iterator MI = modules.find(fileOffset);
412 if (MI != modules.end())
413 return MI->second.first;
414
415 // Module hasn't been loaded yet, we need to load it
416 const char* modptr = base + fileOffset;
417 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size());
418
419 // Now, load the bytecode module to get the ModuleProvider
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000420 std::string FullMemberName = archPath.toString() + "(" +
Reid Spencer1fce0912004-12-11 00:14:15 +0000421 mbr->getPath().toString() + ")";
Reid Spencerf9d7a512004-11-14 21:58:33 +0000422 ModuleProvider* mp = getBytecodeBufferModuleProvider(
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000423 (const unsigned char*) mbr->getData(), mbr->getSize(),
Reid Spencer6ab7a4f2004-11-17 18:25:21 +0000424 FullMemberName, 0);
Reid Spencerf9d7a512004-11-14 21:58:33 +0000425
Reid Spencer9a29db42004-11-20 07:29:40 +0000426 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr)));
Reid Spencerf9d7a512004-11-14 21:58:33 +0000427
428 return mp;
429}
430
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000431// Look up multiple symbols in the symbol table and return a set of
Reid Spencerf9d7a512004-11-14 21:58:33 +0000432// ModuleProviders that define those symbols.
433void
Reid Spencer7783e8a2004-11-19 03:18:22 +0000434Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
Reid Spencer766b7932004-11-15 01:20:11 +0000435 std::set<ModuleProvider*>& result)
Reid Spencerf9d7a512004-11-14 21:58:33 +0000436{
Reid Spencer766b7932004-11-15 01:20:11 +0000437 assert(mapfile && base && "Can't findModulesDefiningSymbols on new archive");
438 if (symTab.empty()) {
439 // We don't have a symbol table, so we must build it now but lets also
440 // make sure that we populate the modules table as we do this to ensure
441 // that we don't load them twice when findModuleDefiningSymbol is called
442 // below.
443
444 // Get a pointer to the first file
445 const char* At = ((const char*)base) + firstFileOffset;
446 const char* End = ((const char*)base) + mapfile->size();
447
448 while ( At < End) {
449 // Compute the offset to be put in the symbol table
450 unsigned offset = At - base - firstFileOffset;
451
452 // Parse the file's header
453 ArchiveMember* mbr = parseMemberHeader(At, End);
454
455 // If it contains symbols
456 if (mbr->isBytecode() || mbr->isCompressedBytecode()) {
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000457 // Get the symbols
Reid Spencer766b7932004-11-15 01:20:11 +0000458 std::vector<std::string> symbols;
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000459 std::string FullMemberName = archPath.toString() + "(" +
Reid Spencer1fce0912004-12-11 00:14:15 +0000460 mbr->getPath().toString() + ")";
Reid Spencer766b7932004-11-15 01:20:11 +0000461 ModuleProvider* MP = GetBytecodeSymbols((const unsigned char*)At,
Reid Spencer6ab7a4f2004-11-17 18:25:21 +0000462 mbr->getSize(), FullMemberName, symbols);
Reid Spencer766b7932004-11-15 01:20:11 +0000463
464 if (MP) {
465 // Insert the module's symbols into the symbol table
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000466 for (std::vector<std::string>::iterator I = symbols.begin(),
Reid Spencer766b7932004-11-15 01:20:11 +0000467 E=symbols.end(); I != E; ++I ) {
Reid Spencer9a29db42004-11-20 07:29:40 +0000468 symTab.insert(std::make_pair(*I, offset));
Reid Spencer766b7932004-11-15 01:20:11 +0000469 }
470 // Insert the ModuleProvider and the ArchiveMember into the table of
471 // modules.
Reid Spencer9a29db42004-11-20 07:29:40 +0000472 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr)));
Reid Spencer766b7932004-11-15 01:20:11 +0000473 } else {
474 throw std::string("Can't parse bytecode member: ") +
Reid Spencer1fce0912004-12-11 00:14:15 +0000475 mbr->getPath().toString();
Reid Spencer766b7932004-11-15 01:20:11 +0000476 }
477 }
Reid Spencerb3231132004-11-15 01:40:20 +0000478
479 // Go to the next file location
480 At += mbr->getSize();
Reid Spencer6405c9e2004-11-19 17:08:00 +0000481 if ((intptr_t(At) & 1) == 1)
Reid Spencerb3231132004-11-15 01:40:20 +0000482 At++;
Reid Spencer766b7932004-11-15 01:20:11 +0000483 }
484 }
485
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000486 // At this point we have a valid symbol table (one way or another) so we
Reid Spencer766b7932004-11-15 01:20:11 +0000487 // just use it to quickly find the symbols requested.
488
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000489 for (std::set<std::string>::iterator I=symbols.begin(),
Reid Spencer7783e8a2004-11-19 03:18:22 +0000490 E=symbols.end(); I != E;) {
491 // See if this symbol exists
Reid Spencerf9d7a512004-11-14 21:58:33 +0000492 ModuleProvider* mp = findModuleDefiningSymbol(*I);
493 if (mp) {
Reid Spencer7783e8a2004-11-19 03:18:22 +0000494 // The symbol exists, insert the ModuleProvider into our result,
495 // duplicates wil be ignored
Reid Spencer766b7932004-11-15 01:20:11 +0000496 result.insert(mp);
Reid Spencer7783e8a2004-11-19 03:18:22 +0000497
Misha Brukman2b37d7c2005-04-21 21:13:18 +0000498 // Remove the symbol now that its been resolved, being careful to
Reid Spencer57646ec2004-11-19 03:44:10 +0000499 // post-increment the iterator.
500 symbols.erase(I++);
Reid Spencer7783e8a2004-11-19 03:18:22 +0000501 } else {
502 ++I;
Chris Lattner968cfd02003-04-19 21:45:34 +0000503 }
Reid Spencerf9d7a512004-11-14 21:58:33 +0000504 }
Chris Lattner968cfd02003-04-19 21:45:34 +0000505}
Reid Spencereaa06bb2005-02-26 22:00:32 +0000506
Chris Lattner7c439922005-09-23 06:22:58 +0000507bool Archive::isBytecodeArchive() {
508 // Make sure the symTab has been loaded. In most cases this should have been
509 // done when the archive was constructed, but still, this is just in case.
510 if (!symTab.size())
Reid Spencereaa06bb2005-02-26 22:00:32 +0000511 loadSymbolTable();
512
Chris Lattner7c439922005-09-23 06:22:58 +0000513 // Now that we know it's been loaded, return true
514 // if it has a size
515 if (symTab.size()) return true;
Reid Spencereaa06bb2005-02-26 22:00:32 +0000516
517 //We still can't be sure it isn't a bytecode archive
518 loadArchive();
519
520 std::vector<Module *> Modules;
521 std::string ErrorMessage;
522
Chris Lattner7c439922005-09-23 06:22:58 +0000523 // Scan the archive, trying to load a bytecode member. We only load one to
524 // see if this works.
525 for (iterator I = begin(), E = end(); I != E; ++I) {
526 if (!I->isBytecode() && !I->isCompressedBytecode())
527 continue;
528
529 std::string FullMemberName =
530 archPath.toString() + "(" + I->getPath().toString() + ")";
531 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(),
532 I->getSize(), FullMemberName);
533 if (!M)
534 return false; // Couldn't parse bytecode, not a bytecode archive.
535 delete M;
536 return true;
537 }
538
539 return false;
Reid Spencereaa06bb2005-02-26 22:00:32 +0000540}