blob: c3f4c907fea88cc003d7f864e474257ea983fcbb [file] [log] [blame]
Chris Lattner00950542001-06-06 20:29:01 +00001//===- Reader.cpp - Code to read bytecode files -----------------------------===
2//
3// This library implements the functionality defined in llvm/Bytecode/Reader.h
4//
5// Note that this library should be as fast as possible, reentrant, and
6// threadsafe!!
7//
8// TODO: Make error message outputs be configurable depending on an option?
9// TODO: Allow passing in an option to ignore the symbol table
10//
11//===------------------------------------------------------------------------===
12
13#include "llvm/Bytecode/Reader.h"
14#include "llvm/Bytecode/Format.h"
15#include "llvm/Module.h"
16#include "llvm/BasicBlock.h"
17#include "llvm/DerivedTypes.h"
18#include "llvm/ConstPoolVals.h"
19#include "llvm/iOther.h"
20#include "ReaderInternals.h"
21#include <sys/types.h>
22#include <sys/mman.h>
23#include <sys/stat.h>
24#include <fcntl.h>
25#include <unistd.h>
26#include <algorithm>
27
28bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
29 if (Ty->isPrimitiveType()) {
30 Slot = Ty->getPrimitiveID();
31 } else {
32 TypeMapType::iterator I = TypeMap.find(Ty);
33 if (I == TypeMap.end()) return true; // Didn't find type!
34 Slot = I->second;
35 }
36 //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl;
37 return false;
38}
39
40const Type *BytecodeParser::getType(unsigned ID) {
41 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
42 if (T) return T;
43
44 //cerr << "Looking up Type ID: " << ID << endl;
45
46 const Value *D = getValue(Type::TypeTy, ID, false);
47 if (D == 0) return 0;
48
49 assert(D->getType() == Type::TypeTy &&
50 D->getValueType() == Value::ConstantVal);
51
52
53 return ((const ConstPoolType*)D)->getValue();;
54}
55
56bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) {
57 unsigned type;
58 if (getTypeSlot(Def->getType(), type)) return true;
59
60 if (ValueTab.size() <= type)
61 ValueTab.resize(type+1, ValueList());
62
63 //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
64 // << "] = " << Def << endl;
65
66 if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) {
67 const Type *Ty = ((const ConstPoolType*)Def)->getValue();
68 unsigned ValueOffset = FirstDerivedTyID;
69
70 if (&ValueTab == &Values) // Take into consideration module level types
71 ValueOffset += ModuleValues[type].size();
72
73 if (TypeMap.find(Ty) == TypeMap.end())
74 TypeMap[Ty] = ValueTab[type].size()+ValueOffset;
75 }
76
77 ValueTab[type].push_back(Def);
78
79 return false;
80}
81
82Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
83 unsigned Num = oNum;
84 unsigned type; // The type plane it lives in...
85
86 if (getTypeSlot(Ty, type)) return 0; // TODO: true
87
88 if (type == Type::TypeTyID) { // The 'type' plane has implicit values
89 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
90 if (T) return (Value*)T; // Asked for a primitive type...
91
92 // Otherwise, derived types need offset...
93 Num -= FirstDerivedTyID;
94 }
95
96 if (ModuleValues.size() > type) {
97 if (ModuleValues[type].size() > Num)
98 return ModuleValues[type][Num];
99 Num -= ModuleValues[type].size();
100 }
101
102 if (Values.size() > type && Values[type].size() > Num)
103 return Values[type][Num];
104
105 if (!Create) return 0; // Do not create a placeholder?
106
107 Value *d = 0;
108 switch (Ty->getPrimitiveID()) {
109 case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break;
110 case Type::MethodTyID:
111 cerr << "Creating method pholder! : " << type << ":" << oNum << " "
112 << Ty->getName() << endl;
113 d = new MethPHolder(Ty, oNum);
114 insertValue(d, LateResolveModuleValues);
115 return d;
116 default: d = new DefPHolder(Ty, oNum); break;
117 }
118
119 assert(d != 0 && "How did we not make something?");
120 if (insertValue(d, LateResolveValues)) return 0;
121 return d;
122}
123
124bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
125 bool Error = false;
126 for (unsigned ty = 0; ty < ValTab.size(); ty++) {
127 ValueList &DL = ValTab[ty];
128 unsigned Size;
129 while ((Size = DL.size())) {
130 unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
131
132 Value *D = DL[Size-1];
133 DL.pop_back();
134
135 Value *NewDef = getValue(D->getType(), IDNumber, false);
136 if (NewDef == 0) {
137 Error = true; // Unresolved thinger
138 cerr << "Unresolvable reference found: <" << D->getType()->getName()
139 << ">:" << IDNumber << "!\n";
140 } else {
141 // Fixup all of the uses of this placeholder def...
142 D->replaceAllUsesWith(NewDef);
143
144 // Now that all the uses are gone, delete the placeholder...
145 // If we couldn't find a def (error case), then leak a little
146 delete D; // memory, 'cause otherwise we can't remove all uses!
147 }
148 }
149 }
150
151 return Error;
152}
153
154bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
155 BasicBlock *&BB) {
156 BB = new BasicBlock();
157
158 while (Buf < EndBuf) {
159 Instruction *Def;
160 if (ParseInstruction(Buf, EndBuf, Def)) {
161 delete BB;
162 return true;
163 }
164
165 if (Def == 0) { delete BB; return true; }
166 if (insertValue(Def, Values)) { delete BB; return true; }
167
168 BB->getInstList().push_back(Def);
169 }
170
171 return false;
172}
173
174bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) {
175 while (Buf < EndBuf) {
176 // Symtab block header: [num entries][type id number]
177 unsigned NumEntries, Typ;
178 if (read_vbr(Buf, EndBuf, NumEntries) ||
179 read_vbr(Buf, EndBuf, Typ)) return true;
180 const Type *Ty = getType(Typ);
181 if (Ty == 0) return true;
182
183 for (unsigned i = 0; i < NumEntries; i++) {
184 // Symtab entry: [def slot #][name]
185 unsigned slot;
186 if (read_vbr(Buf, EndBuf, slot)) return true;
187 string Name;
188 if (read(Buf, EndBuf, Name, false)) // Not aligned...
189 return true;
190
191 Value *D = getValue(Ty, slot, false); // Find mapping...
192 if (D == 0) return true;
193 D->setName(Name);
194 }
195 }
196
197 return Buf > EndBuf;
198}
199
200
201bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
202 Module *C) {
203 // Clear out the local values table...
204 Values.clear();
205 if (MethodSignatureList.empty()) return true; // Unexpected method!
206
207 const MethodType *MTy = MethodSignatureList.front().first;
208 unsigned MethSlot = MethodSignatureList.front().second;
209 MethodSignatureList.pop_front();
210 Method *M = new Method(MTy);
211
212 const MethodType::ParamTypes &Params = MTy->getParamTypes();
213 for (MethodType::ParamTypes::const_iterator It = Params.begin();
214 It != Params.end(); It++) {
215 MethodArgument *MA = new MethodArgument(*It);
216 if (insertValue(MA, Values)) { delete M; return true; }
217 M->getArgumentList().push_back(MA);
218 }
219
220 while (Buf < EndBuf) {
221 unsigned Type, Size;
222 const uchar *OldBuf = Buf;
223 if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; }
224
225 switch (Type) {
226 case BytecodeFormat::ConstantPool:
227 if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) {
228 cerr << "Error reading constant pool!\n";
229 delete M; return true;
230 }
231 break;
232
233 case BytecodeFormat::BasicBlock: {
234 BasicBlock *BB;
235 if (ParseBasicBlock(Buf, Buf+Size, BB) ||
236 insertValue(BB, Values)) {
237 cerr << "Error parsing basic block!\n";
238 delete M; return true; // Parse error... :(
239 }
240
241 M->getBasicBlocks().push_back(BB);
242 break;
243 }
244
245 case BytecodeFormat::SymbolTable:
246 if (ParseSymbolTable(Buf, Buf+Size)) {
247 cerr << "Error reading method symbol table!\n";
248 delete M; return true;
249 }
250 break;
251
252 default:
253 Buf += Size;
254 if (OldBuf > Buf) return true; // Wrap around!
255 break;
256 }
257 if (align32(Buf, EndBuf)) {
258 delete M; // Malformed bc file, read past end of block.
259 return true;
260 }
261 }
262
263 if (postResolveValues(LateResolveValues) ||
264 postResolveValues(LateResolveModuleValues)) {
265 delete M; return true; // Unresolvable references!
266 }
267
268 Value *MethPHolder = getValue(MTy, MethSlot, false);
269 assert(MethPHolder && "Something is broken no placeholder found!");
270 assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?");
271
272 unsigned type; // Type slot
273 assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
274 getTypeSlot(MTy, type);
275
276 C->getMethodList().push_back(M);
277
278 // Replace placeholder with the real method pointer...
279 ModuleValues[type][MethSlot] = M;
280
281 // If anyone is using the placeholder make them use the real method instead
282 MethPHolder->replaceAllUsesWith(M);
283
284 // We don't need the placeholder anymore!
285 delete MethPHolder;
286
287 return false;
288}
289
290bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
291 Module *C) {
292
293 if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks?
294
295 // Read the method signatures for all of the methods that are coming, and
296 // create fillers in the Value tables.
297 unsigned MethSignature;
298 if (read_vbr(Buf, End, MethSignature)) return true;
299 while (MethSignature != Type::VoidTyID) { // List is terminated by Void
300 const Type *Ty = getType(MethSignature);
301 if (!Ty || !Ty->isMethodType()) {
302 cerr << "Method not meth type! ";
303 if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl;
304 return true;
305 }
306
307 // When the ModuleGlobalInfo section is read, we load the type of each method
308 // and the 'ModuleValues' slot that it lands in. We then load a placeholder
309 // into its slot to reserve it. When the method is loaded, this placeholder
310 // is replaced.
311
312 // Insert the placeholder...
313 Value *Def = new MethPHolder(Ty, 0);
314 insertValue(Def, ModuleValues);
315
316 // Figure out which entry of its typeslot it went into...
317 unsigned TypeSlot;
318 if (getTypeSlot(Def->getType(), TypeSlot)) return true;
319
320 unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
321
322 // Keep track of this information in a linked list that is emptied as
323 // methods are loaded...
324 //
325 MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo));
326 if (read_vbr(Buf, End, MethSignature)) return true;
327 }
328
329 if (align32(Buf, End)) return true;
330
331 // This is for future proofing... in the future extra fields may be added that
332 // we don't understand, so we transparently ignore them.
333 //
334 Buf = End;
335 return false;
336}
337
338bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
339 Module *&C) {
340
341 unsigned Type, Size;
342 if (readBlock(Buf, EndBuf, Type, Size)) return true;
343 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
344 return true; // Hrm, not a class?
345
346 MethodSignatureList.clear(); // Just in case...
347
348 // Read into instance variables...
349 if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true;
350 if (align32(Buf, EndBuf)) return true;
351
352 C = new Module();
353
354 while (Buf < EndBuf) {
355 const uchar *OldBuf = Buf;
356 if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; }
357 switch (Type) {
358 case BytecodeFormat::ModuleGlobalInfo:
359 if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) {
360 cerr << "Error reading class global info section!\n";
361 delete C; return true;
362 }
363 break;
364
365 case BytecodeFormat::ConstantPool:
366 if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) {
367 cerr << "Error reading class constant pool!\n";
368 delete C; return true;
369 }
370 break;
371
372 case BytecodeFormat::Method: {
373 if (ParseMethod(Buf, Buf+Size, C)) {
374 delete C; return true; // Error parsing method
375 }
376 break;
377 }
378
379 case BytecodeFormat::SymbolTable:
380 if (ParseSymbolTable(Buf, Buf+Size)) {
381 cerr << "Error reading class symbol table!\n";
382 delete C; return true;
383 }
384 break;
385
386 default:
387 cerr << "Unknown class block: " << Type << endl;
388 Buf += Size;
389 if (OldBuf > Buf) return true; // Wrap around!
390 break;
391 }
392 if (align32(Buf, EndBuf)) { delete C; return true; }
393 }
394
395 if (!MethodSignatureList.empty()) // Expected more methods!
396 return true;
397 return false;
398}
399
400Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
401 LateResolveValues.clear();
402 unsigned Sig;
403 // Read and check signature...
404 if (read(Buf, EndBuf, Sig) ||
405 Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24))
406 return 0; // Invalid signature!
407
408 Module *Result;
409 if (ParseModule(Buf, EndBuf, Result)) return 0;
410 return Result;
411}
412
413
414Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) {
415 BytecodeParser Parser;
416 return Parser.ParseBytecode(Buffer, Buffer+Length);
417}
418
419// Parse and return a class file...
420//
421Module *ParseBytecodeFile(const string &Filename) {
422 struct stat StatBuf;
423 Module *Result = 0;
424
425 if (Filename != string("-")) { // Read from a file...
426 int FD = open(Filename.data(), O_RDONLY);
427 if (FD == -1) return 0;
428
429 if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; }
430
431 int Length = StatBuf.st_size;
432 if (Length == 0) { close(FD); return 0; }
433 uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
434 MAP_PRIVATE, FD, 0);
435 if (Buffer == (uchar*)-1) { close(FD); return 0; }
436
437 BytecodeParser Parser;
438 Result = Parser.ParseBytecode(Buffer, Buffer+Length);
439
440 munmap((char*)Buffer, Length);
441 close(FD);
442 } else { // Read from stdin
443 size_t FileSize = 0;
444 int BlockSize;
445 uchar Buffer[4096], *FileData = 0;
446 while ((BlockSize = read(0, Buffer, 4))) {
447 if (BlockSize == -1) { free(FileData); return 0; }
448
449 FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
450 memcpy(FileData+FileSize, Buffer, BlockSize);
451 FileSize += BlockSize;
452 }
453
454 if (FileSize == 0) { free(FileData); return 0; }
455
456#define ALIGN_PTRS 1
457#if ALIGN_PTRS
458 uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
459 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
460 assert((Buf != (uchar*)-1) && "mmap returned error!");
461 free(FileData);
462 memcpy(Buf, FileData, FileSize);
463#else
464 uchar *Buf = FileData;
465#endif
466
467 BytecodeParser Parser;
468 Result = Parser.ParseBytecode(Buf, Buf+FileSize);
469
470#if ALIGN_PTRS
471 munmap((char*)Buf, FileSize); // Free mmap'd data area
472#else
473 free(FileData); // Free realloc'd block of memory
474#endif
475 }
476
477 return Result;
478}