blob: ae1a933078918a2fecfa834a21ced23b4ccab14a [file] [log] [blame]
Reid Spencerf9d7a512004-11-14 21:58:33 +00001//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
John Criswellb576c942003-10-20 19:43:21 +00002//
3// The LLVM Compiler Infrastructure
4//
Reid Spencerf9d7a512004-11-14 21:58:33 +00005// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
John Criswellb576c942003-10-20 19:43:21 +00007//
8//===----------------------------------------------------------------------===//
Chris Lattner968cfd02003-04-19 21:45:34 +00009//
Reid Spencerf9d7a512004-11-14 21:58:33 +000010// Builds up standard unix archive files (.a) containing LLVM bytecode.
Chris Lattner968cfd02003-04-19 21:45:34 +000011//
12//===----------------------------------------------------------------------===//
13
Reid Spencerf9d7a512004-11-14 21:58:33 +000014#include "ArchiveInternals.h"
Chris Lattner968cfd02003-04-19 21:45:34 +000015#include "llvm/Bytecode/Reader.h"
Reid Spencerf9d7a512004-11-14 21:58:33 +000016
Chris Lattner3446ae82004-01-10 19:00:15 +000017using namespace llvm;
Brian Gaeked0fde302003-11-11 22:41:34 +000018
Chris Lattner968cfd02003-04-19 21:45:34 +000019namespace {
Chris Lattner968cfd02003-04-19 21:45:34 +000020
Reid Spencerf9d7a512004-11-14 21:58:33 +000021/// Read a variable-bit-rate encoded unsigned integer
22inline unsigned readInteger(const char*&At, const char*End) {
23 unsigned Shift = 0;
24 unsigned Result = 0;
25
26 do {
27 if (At == End)
28 throw std::string("Ran out of data reading vbr_uint!");
29 Result |= (unsigned)((*At++) & 0x7F) << Shift;
30 Shift += 7;
31 } while (At[-1] & 0x80);
32 return Result;
Chris Lattner968cfd02003-04-19 21:45:34 +000033}
34
Chris Lattner968cfd02003-04-19 21:45:34 +000035}
36
Reid Spencerf9d7a512004-11-14 21:58:33 +000037// Completely parse the Archive's symbol table and populate symTab member var.
38void
39Archive::parseSymbolTable(const void* data, unsigned size) {
40 const char* At = (const char*) data;
41 const char* End = At + size;
42 while (At < End) {
43 unsigned offset = readInteger(At, End);
44 unsigned length = readInteger(At, End);
45 if (At + length > End)
46 throw std::string("malformed symbol table");
47 // we don't care if it can't be inserted (duplicate entry)
48 symTab.insert(std::make_pair(std::string(At,length),offset));
49 At += length;
50 }
51 symTabSize = size;
Chris Lattner968cfd02003-04-19 21:45:34 +000052}
53
Reid Spencerf9d7a512004-11-14 21:58:33 +000054// This member parses an ArchiveMemberHeader that is presumed to be pointed to
55// by At. The At pointer is updated to the byte just after the header, which
56// can be variable in size.
57ArchiveMember*
58Archive::parseMemberHeader(const char*& At, const char* End) {
59 assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data");
60
61 // Cast archive member header
62 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
63 At += sizeof(ArchiveMemberHeader);
64
65 // Instantiate the ArchiveMember to be filled
66 ArchiveMember* member = new ArchiveMember(this);
67
68 // Extract the size and determine if the file is
69 // compressed or not (negative length).
70 int flags = 0;
71 int MemberSize = atoi(Hdr->size);
72 if (MemberSize < 0) {
73 flags |= ArchiveMember::CompressedFlag;
74 MemberSize = -MemberSize;
75 }
76
77 // Check the size of the member for sanity
78 if (At + MemberSize > End)
79 throw std::string("invalid member length in archive file");
80
81 // Check the member signature
82 if (!Hdr->checkSignature())
83 throw std::string("invalid file member signature");
84
85 // Convert and check the member name
86 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
87 // table. The special name "//" and 14 blanks is for a string table, used
88 // for long file names. This library doesn't generate either of those but
89 // it will accept them. If the name starts with #1/ and the remainder is
90 // digits, then those digits specify the length of the name that is
91 // stored immediately following the header. The special name
92 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode.
93 // Anything else is a regular, short filename that is terminated with
94 // a '/' and blanks.
95
96 std::string pathname;
97 unsigned index;
98 switch (Hdr->name[0]) {
99 case '#':
100 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
101 if (isdigit(Hdr->name[3])) {
102 unsigned len = atoi(&Hdr->name[3]);
103 pathname.assign(At,len);
104 At += len + 1; // terminated by \n
105 flags |= ArchiveMember::HasLongFilenameFlag;
106 } else
107 throw std::string("invalid long filename");
108 } else if (Hdr->name[1] == '_' &&
109 (0==memcmp(Hdr->name,ARFILE_LLVM_SYMTAB_NAME,16))) {
110 // The member is using a long file name (>15 chars) format.
111 // This format is standard for 4.4BSD and Mac OSX operating
112 // systems. LLVM uses it similarly. In this format, the
113 // remainder of the name field (after #1/) specifies the
114 // length of the file name which occupy the first bytes of
115 // the member's data. The pathname already has the #1/ stripped.
116 pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
117 flags |= ArchiveMember::LLVMSymbolTableFlag;
118 }
119 break;
120 case '/':
121 if (Hdr->name[1]== '/') {
122 if (0==memcmp(Hdr->name,ARFILE_STRTAB_NAME,16)) {
123 pathname.assign(ARFILE_STRTAB_NAME);
124 flags |= ArchiveMember::StringTableFlag;
125 } else {
126 throw std::string("invalid string table name");
127 }
128 } else if (Hdr->name[1] == ' ') {
129 if (0==memcmp(Hdr->name,ARFILE_SYMTAB_NAME,16)) {
130 pathname.assign(ARFILE_SYMTAB_NAME);
131 flags |= ArchiveMember::ForeignSymbolTableFlag;
132 } else {
133 throw std::string("invalid foreign symbol table name");
134 }
135 } else if (isdigit(Hdr->name[1])) {
136 unsigned index = atoi(&Hdr->name[1]);
137 if (index < strtab.length()) {
138 const char* namep = strtab.c_str() + index;
139 const char* endp = strtab.c_str() + strtab.length();
140 const char* p = namep;
141 const char* last_p = p;
142 while (p < endp) {
143 if (*p == '\n' && *last_p == '/') {
144 pathname.assign(namep,last_p-namep);
145 flags |= ArchiveMember::HasLongFilenameFlag;
146 break;
147 }
148 last_p = p;
149 p++;
150 }
151 if (p >= endp)
152 throw std::string("missing name termiantor in string table");
153 } else {
154 throw std::string("name index beyond string table");
155 }
156 }
157 break;
158
159 default:
160 char* slash = (char*) memchr(Hdr->name,'/',16);
161 if (slash == 0)
162 throw std::string("missing name terminator");
163 pathname.assign(Hdr->name,slash-Hdr->name);
164 break;
165 }
166
167 // Determine if this is a bytecode file
168 switch (sys::IdentifyFileType(At,4)) {
169 case sys::BytecodeFileType:
170 flags |= ArchiveMember::BytecodeFlag;
171 break;
172 case sys::CompressedBytecodeFileType:
173 flags |= ArchiveMember::CompressedBytecodeFlag;
174 flags &= ~ArchiveMember::CompressedFlag;
175 break;
176 default:
177 flags &= ~(ArchiveMember::BytecodeFlag|
178 ArchiveMember::CompressedBytecodeFlag);
179 break;
180 }
181
182 // Fill in fields of the ArchiveMember
183 member->next = 0;
184 member->prev = 0;
185 member->parent = this;
186 member->path.setFile(pathname);
187 member->info.fileSize = MemberSize;
188 member->info.modTime.fromEpochTime(atoi(Hdr->date));
189 sscanf(Hdr->mode,"%o",&(member->info.mode));
190 member->info.user = atoi(Hdr->uid);
191 member->info.group = atoi(Hdr->gid);
192 member->flags = flags;
193 member->data = At;
194
195 return member;
196}
197
198void
199Archive::checkSignature() {
200 // Check the magic string at file's header
201 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC,8))
202 throw std::string("invalid signature for an archive file");
203}
204
205// This function loads the entire archive and fully populates its ilist with
206// the members of the archive file. This is typically used in preparation for
207// editing the contents of the archive.
208void
209Archive::loadArchive() {
210
211 // Set up parsing
212 members.clear();
213 symTab.clear();
214 const char *At = base;
215 const char *End = base + mapfile->size();
216
217 checkSignature();
218 At += 8; // Skip the magic string.
219
220 bool seenSymbolTable = false;
221 bool foundFirstFile = false;
222 while (At < End) {
223 // parse the member header
224 const char* Save = At;
225 ArchiveMember* mbr = parseMemberHeader(At, End);
226
227 // check if this is the foreign symbol table
228 if (mbr->isForeignSymbolTable()) {
229 // We don't do anything with this but delete it
230 At += mbr->getSize();
231 delete mbr;
232 if ((int(At) & 1) == 1)
233 At++;
234 } else if (mbr->isStringTable()) {
235 strtab.assign(At,mbr->getSize());
236 At += mbr->getSize();
237 if ((int(At) & 1) == 1)
238 At++;
239 delete mbr;
240 } else if (mbr->isLLVMSymbolTable()) {
241 if (seenSymbolTable)
242 throw std::string("invalid archive: multiple symbol tables");
243 parseSymbolTable(mbr->getData(),mbr->getSize());
244 seenSymbolTable = true;
245 At += mbr->getSize();
246 if ((int(At) & 1) == 1)
247 At++;
248 delete mbr;
249 } else {
250 if (!foundFirstFile) {
251 firstFileOffset = Save - base;
252 foundFirstFile = true;
253 }
254 members.push_back(mbr);
255 At += mbr->getSize();
256 if ((int(At) & 1) == 1)
257 At++;
258 }
259 }
260}
261
262// Open and completely load the archive file.
263Archive*
264Archive::OpenAndLoad(const sys::Path& file) {
265
266 Archive* result = new Archive(file,true);
267
268 result->loadArchive();
269
270 return result;
271}
272
273// Get all the bytecode modules from the archive
274bool
275Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
276
277 for (iterator I=begin(), E=end(); I != E; ++I) {
278 if (I->isBytecode() || I->isCompressedBytecode()) {
279 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(),
280 I->getSize(), I->getPath().get(), ErrMessage);
281 if (!M)
282 return true;
283
284 Modules.push_back(M);
285 }
286 }
Brian Gaeke2c61d7b2003-11-16 23:08:48 +0000287 return false;
288}
Chris Lattner968cfd02003-04-19 21:45:34 +0000289
Reid Spencerf9d7a512004-11-14 21:58:33 +0000290// Load just the symbol table from the archive file
291void
292Archive::loadSymbolTable() {
Chris Lattner968cfd02003-04-19 21:45:34 +0000293
Reid Spencerf9d7a512004-11-14 21:58:33 +0000294 // Set up parsing
295 members.clear();
296 symTab.clear();
297 const char *At = base;
298 const char *End = base + mapfile->size();
Chris Lattner968cfd02003-04-19 21:45:34 +0000299
Reid Spencerf9d7a512004-11-14 21:58:33 +0000300 // Make sure we're dealing with an archive
301 checkSignature();
Chris Lattner968cfd02003-04-19 21:45:34 +0000302
Reid Spencerf9d7a512004-11-14 21:58:33 +0000303 At += 8; // Skip signature
304
305 // Parse the first file member header
306 const char* FirstFile = At;
307 ArchiveMember* mbr = parseMemberHeader(At, End);
308
309 if (mbr->isForeignSymbolTable()) {
310 // Skip the foreign symbol table, we don't do anything with it
311 At += mbr->getSize();
312 delete mbr;
313
314 // See if there's a string table too
315 FirstFile = At;
316 mbr = parseMemberHeader(At,End);
317 if (mbr->isStringTable()) {
318 strtab.assign((const char*)mbr->getData(),mbr->getSize());
319 At += mbr->getSize();
320 delete mbr;
321 FirstFile = At;
322 mbr = parseMemberHeader(At,End);
Brian Gaeke2c61d7b2003-11-16 23:08:48 +0000323 }
Chris Lattner968cfd02003-04-19 21:45:34 +0000324 }
325
Reid Spencerf9d7a512004-11-14 21:58:33 +0000326 // See if its the symbol table
327 if (mbr->isLLVMSymbolTable()) {
328 parseSymbolTable(mbr->getData(),mbr->getSize());
329 FirstFile = At + mbr->getSize();
330 if (mbr->getSize() % 2 != 0)
331 FirstFile++;
332 } else {
333 // There's no symbol table in the file. We have to rebuild it from scratch
334 // because the intent of this method is to get the symbol table loaded so
335 // it can be searched efficiently.
336 // Add the member to the members list
337 members.push_back(mbr);
338 }
339
340 firstFileOffset = FirstFile - base;
Chris Lattner968cfd02003-04-19 21:45:34 +0000341}
342
Reid Spencerf9d7a512004-11-14 21:58:33 +0000343// Open the archive and load just the symbol tables
344Archive*
345Archive::OpenAndLoadSymbols(const sys::Path& file) {
346 Archive* result = new Archive(file,true);
Chris Lattner968cfd02003-04-19 21:45:34 +0000347
Reid Spencerf9d7a512004-11-14 21:58:33 +0000348 result->loadSymbolTable();
Chris Lattnerb70abe12003-12-30 07:40:35 +0000349
Reid Spencerf9d7a512004-11-14 21:58:33 +0000350 return result;
351}
Chris Lattner968cfd02003-04-19 21:45:34 +0000352
Reid Spencerf9d7a512004-11-14 21:58:33 +0000353// Look up one symbol in the symbol table and return a ModuleProvider for the
354// module that defines that symbol.
355ModuleProvider*
356Archive::findModuleDefiningSymbol(const std::string& symbol) {
357 SymTabType::iterator SI = symTab.find(symbol);
358 if (SI == symTab.end())
359 return 0;
360
361 // The symbol table was previously constructed assuming that the members were
362 // written without the symbol table header. Because VBR encoding is used, the
363 // values could not be adjusted to account for the offset of the symbol table
364 // because that could affect the size of the symbol table due to VBR encoding.
365 // We now have to account for this by adjusting the offset by the size of the
366 // symbol table and its header.
367 unsigned fileOffset =
368 SI->second + // offset in symbol-table-less file
369 firstFileOffset; // add offset to first "real" file in archive
370
371 // See if the module is already loaded
372 ModuleMap::iterator MI = modules.find(fileOffset);
373 if (MI != modules.end())
374 return MI->second.first;
375
376 // Module hasn't been loaded yet, we need to load it
377 const char* modptr = base + fileOffset;
378 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size());
379
380 // Now, load the bytecode module to get the ModuleProvider
381 ModuleProvider* mp = getBytecodeBufferModuleProvider(
382 (const unsigned char*) mbr->getData(), mbr->getSize(),
383 mbr->getPath().get(), 0);
384
385 modules.insert(std::make_pair(fileOffset,std::make_pair(mp,mbr)));
386
387 return mp;
388}
389
390// Look up multiple symbols in the symbol table and return a set of
391// ModuleProviders that define those symbols.
392void
393Archive::findModulesDefiningSymbols(const std::set<std::string>& symbols,
394 std::set<ModuleProvider*>& modules)
395{
396 for (std::set<std::string>::const_iterator I=symbols.begin(),
397 E=symbols.end(); I != E; ++I) {
398 ModuleProvider* mp = findModuleDefiningSymbol(*I);
399 if (mp) {
400 modules.insert(mp);
Chris Lattner968cfd02003-04-19 21:45:34 +0000401 }
Reid Spencerf9d7a512004-11-14 21:58:33 +0000402 }
Chris Lattner968cfd02003-04-19 21:45:34 +0000403}