blob: d236b64aae25c6cbb5d858973bb790ab4d097166 [file] [log] [blame]
Reid Spencerdac69c82004-06-07 17:53:43 +00001//===- Reader.cpp - Code to read bytecode files ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This library implements the functionality defined in llvm/Bytecode/Reader.h
11//
12// Note that this library should be as fast as possible, reentrant, and
13// threadsafe!!
14//
15// TODO: Allow passing in an option to ignore the symbol table
16//
17//===----------------------------------------------------------------------===//
18
19#include "AnalyzerInternals.h"
20#include "llvm/Module.h"
21#include "llvm/Bytecode/Format.h"
22#include "Support/StringExtras.h"
23#include <iostream>
24#include <sstream>
25
26using namespace llvm;
27
28#define PARSE_ERROR(inserters) \
29 { \
30 std::ostringstream errormsg; \
31 errormsg << inserters; \
32 if ( ! handler->handleError( errormsg.str() ) ) \
33 throw std::string(errormsg.str()); \
34 }
35
36const Type *AbstractBytecodeParser::getType(unsigned ID) {
37 //cerr << "Looking up Type ID: " << ID << "\n";
38
39 if (ID < Type::FirstDerivedTyID)
40 if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
41 return T; // Asked for a primitive type...
42
43 // Otherwise, derived types need offset...
44 ID -= Type::FirstDerivedTyID;
45
46 if (!CompactionTypeTable.empty()) {
47 if (ID >= CompactionTypeTable.size())
48 PARSE_ERROR("Type ID out of range for compaction table!");
49 return CompactionTypeTable[ID];
50 }
51
52 // Is it a module-level type?
53 if (ID < ModuleTypes.size())
54 return ModuleTypes[ID].get();
55
56 // Nope, is it a function-level type?
57 ID -= ModuleTypes.size();
58 if (ID < FunctionTypes.size())
59 return FunctionTypes[ID].get();
60
61 PARSE_ERROR("Illegal type reference!");
62 return Type::VoidTy;
63}
64
65bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
66 std::vector<unsigned> &Operands) {
67 Operands.clear();
68 unsigned iType = 0;
69 unsigned Opcode = 0;
70 unsigned Op = read(Buf, EndBuf);
71
72 // bits Instruction format: Common to all formats
73 // --------------------------
74 // 01-00: Opcode type, fixed to 1.
75 // 07-02: Opcode
76 Opcode = (Op >> 2) & 63;
77 Operands.resize((Op >> 0) & 03);
78
79 switch (Operands.size()) {
80 case 1:
81 // bits Instruction format:
82 // --------------------------
83 // 19-08: Resulting type plane
84 // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
85 //
86 iType = (Op >> 8) & 4095;
87 Operands[0] = (Op >> 20) & 4095;
88 if (Operands[0] == 4095) // Handle special encoding for 0 operands...
89 Operands.resize(0);
90 break;
91 case 2:
92 // bits Instruction format:
93 // --------------------------
94 // 15-08: Resulting type plane
95 // 23-16: Operand #1
96 // 31-24: Operand #2
97 //
98 iType = (Op >> 8) & 255;
99 Operands[0] = (Op >> 16) & 255;
100 Operands[1] = (Op >> 24) & 255;
101 break;
102 case 3:
103 // bits Instruction format:
104 // --------------------------
105 // 13-08: Resulting type plane
106 // 19-14: Operand #1
107 // 25-20: Operand #2
108 // 31-26: Operand #3
109 //
110 iType = (Op >> 8) & 63;
111 Operands[0] = (Op >> 14) & 63;
112 Operands[1] = (Op >> 20) & 63;
113 Operands[2] = (Op >> 26) & 63;
114 break;
115 case 0:
116 Buf -= 4; // Hrm, try this again...
117 Opcode = read_vbr_uint(Buf, EndBuf);
118 Opcode >>= 2;
119 iType = read_vbr_uint(Buf, EndBuf);
120
121 unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
122 Operands.resize(NumOperands);
123
124 if (NumOperands == 0)
125 PARSE_ERROR("Zero-argument instruction found; this is invalid.");
126
127 for (unsigned i = 0; i != NumOperands; ++i)
128 Operands[i] = read_vbr_uint(Buf, EndBuf);
129 align32(Buf, EndBuf);
130 break;
131 }
132
133 return handler->handleInstruction(Opcode, getType(iType), Operands);
134}
135
136/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
137/// basicblock at a time. This method reads in one of the basicblock packets.
138void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
139 BufPtr EndBuf,
140 unsigned BlockNo) {
141 handler->handleBasicBlockBegin( BlockNo );
142
143 std::vector<unsigned> Args;
144 bool is_terminating = false;
145 while (Buf < EndBuf)
146 is_terminating = ParseInstruction(Buf, EndBuf, Args);
147
148 if ( ! is_terminating )
149 PARSE_ERROR(
150 "Failed to recognize instruction as terminating at end of block");
151
152 handler->handleBasicBlockEnd( BlockNo );
153}
154
155
156/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
157/// body of a function. In post 1.0 bytecode files, we no longer emit basic
158/// block individually, in order to avoid per-basic-block overhead.
159unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, BufPtr EndBuf) {
160 unsigned BlockNo = 0;
161 std::vector<unsigned> Args;
162
163 while (Buf < EndBuf) {
164 handler->handleBasicBlockBegin( BlockNo );
165
166 // Read instructions into this basic block until we get to a terminator
167 bool is_terminating = false;
168 while (Buf < EndBuf && !is_terminating )
169 is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
170
171 if (!is_terminating)
172 PARSE_ERROR( "Non-terminated basic block found!");
173
174 handler->handleBasicBlockEnd( BlockNo );
175 ++BlockNo;
176 }
177 return BlockNo;
178}
179
180void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
181 handler->handleSymbolTableBegin();
182
183 while (Buf < EndBuf) {
184 // Symtab block header: [num entries][type id number]
185 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
186 unsigned Typ = read_vbr_uint(Buf, EndBuf);
187 const Type *Ty = getType(Typ);
188
189 handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
190
191 for (unsigned i = 0; i != NumEntries; ++i) {
192 // Symtab entry: [def slot #][name]
193 unsigned slot = read_vbr_uint(Buf, EndBuf);
194 std::string Name = read_str(Buf, EndBuf);
195
196 if (Typ == Type::TypeTyID)
197 handler->handleSymbolTableType( i, slot, Name );
198 else
199 handler->handleSymbolTableValue( i, slot, Name );
200 }
201 }
202
203 if (Buf > EndBuf)
204 PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
205
206 handler->handleSymbolTableEnd();
207}
208
209void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
210 if (FunctionSignatureList.empty())
211 throw std::string("FunctionSignatureList empty!");
212
213 const Type *FType = FunctionSignatureList.back();
214 FunctionSignatureList.pop_back();
215
216 // Save the information for future reading of the function
217 LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
218 // Pretend we've `parsed' this function
219 Buf = EndBuf;
220}
221
222void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
223 // Find {start, end} pointers and slot in the map. If not there, we're done.
224 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
225
226 // Make sure we found it
227 if ( Fi == LazyFunctionLoadMap.end() ) {
228 PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
229 return;
230 }
231
232 BufPtr Buf = Fi->second.Buf;
233 BufPtr EndBuf = Fi->second.EndBuf;
234 assert(Fi->first == FType);
235
236 LazyFunctionLoadMap.erase(Fi);
237
238 this->ParseFunctionBody( FType, Buf, EndBuf );
239}
240
241void AbstractBytecodeParser::ParseFunctionBody(const Type* FType,
242 BufPtr &Buf, BufPtr EndBuf ) {
243
244 GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
245
246 unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
247 switch (LinkageType) {
248 case 0: Linkage = GlobalValue::ExternalLinkage; break;
249 case 1: Linkage = GlobalValue::WeakLinkage; break;
250 case 2: Linkage = GlobalValue::AppendingLinkage; break;
251 case 3: Linkage = GlobalValue::InternalLinkage; break;
252 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
253 default:
254 PARSE_ERROR("Invalid linkage type for Function.");
255 Linkage = GlobalValue::InternalLinkage;
256 break;
257 }
258
259 handler->handleFunctionBegin(FType,Linkage);
260
261 // Keep track of how many basic blocks we have read in...
262 unsigned BlockNum = 0;
263 bool InsertedArguments = false;
264
265 while (Buf < EndBuf) {
266 unsigned Type, Size;
267 BufPtr OldBuf = Buf;
268 readBlock(Buf, EndBuf, Type, Size);
269
270 switch (Type) {
271 case BytecodeFormat::ConstantPool:
272 ParseConstantPool(Buf, Buf+Size, FunctionTypes );
273 break;
274
275 case BytecodeFormat::CompactionTable:
276 ParseCompactionTable(Buf, Buf+Size);
277 break;
278
279 case BytecodeFormat::BasicBlock:
280 ParseBasicBlock(Buf, Buf+Size, BlockNum++);
281 break;
282
283 case BytecodeFormat::InstructionList:
284 if (BlockNum)
285 PARSE_ERROR("InstructionList must come before basic blocks!");
286 BlockNum = ParseInstructionList(Buf, Buf+Size);
287 break;
288
289 case BytecodeFormat::SymbolTable:
290 ParseSymbolTable(Buf, Buf+Size );
291 break;
292
293 default:
294 Buf += Size;
295 if (OldBuf > Buf)
296 PARSE_ERROR("Wrapped around reading bytecode");
297 break;
298 }
299
300 // Malformed bc file if read past end of block.
301 align32(Buf, EndBuf);
302 }
303
304 handler->handleFunctionEnd(FType);
305
306 // Clear out function-level types...
307 FunctionTypes.clear();
308 CompactionTypeTable.clear();
309}
310
311void AbstractBytecodeParser::ParseAllFunctionBodies() {
312 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
313 LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
314
315 while ( Fi != Fe ) {
316 const Type* FType = Fi->first;
317 this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
318 }
319}
320
321void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
322
323 handler->handleCompactionTableBegin();
324
325 while (Buf != End) {
326 unsigned NumEntries = read_vbr_uint(Buf, End);
327 unsigned Ty;
328
329 if ((NumEntries & 3) == 3) {
330 NumEntries >>= 2;
331 Ty = read_vbr_uint(Buf, End);
332 } else {
333 Ty = NumEntries >> 2;
334 NumEntries &= 3;
335 }
336
337 handler->handleCompactionTablePlane( Ty, NumEntries );
338
339 if (Ty == Type::TypeTyID) {
340 for (unsigned i = 0; i != NumEntries; ++i) {
341 unsigned TypeSlot = read_vbr_uint(Buf,End);
342 const Type *Typ = getGlobalTableType(TypeSlot);
343 handler->handleCompactionTableType( i, TypeSlot, Typ );
344 }
345 } else {
346 const Type *Typ = getType(Ty);
347 // Push the implicit zero
348 for (unsigned i = 0; i != NumEntries; ++i) {
349 unsigned ValSlot = read_vbr_uint(Buf, End);
350 handler->handleCompactionTableValue( i, ValSlot, Typ );
351 }
352 }
353 }
354 handler->handleCompactionTableEnd();
355}
356
357const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
358 const unsigned char *EndBuf) {
359 unsigned PrimType = read_vbr_uint(Buf, EndBuf);
360
361 const Type *Val = 0;
362 if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
363 return Val;
364
365 switch (PrimType) {
366 case Type::FunctionTyID: {
367 const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
368
369 unsigned NumParams = read_vbr_uint(Buf, EndBuf);
370
371 std::vector<const Type*> Params;
372 while (NumParams--)
373 Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
374
375 bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
376 if (isVarArg) Params.pop_back();
377
378 Type* result = FunctionType::get(RetType, Params, isVarArg);
379 handler->handleType( result );
380 return result;
381 }
382 case Type::ArrayTyID: {
383 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
384 const Type *ElementType = getType(ElTyp);
385
386 unsigned NumElements = read_vbr_uint(Buf, EndBuf);
387
388 BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size="
389 << NumElements << "\n");
390 Type* result = ArrayType::get(ElementType, NumElements);
391 handler->handleType( result );
392 return result;
393 }
394 case Type::StructTyID: {
395 std::vector<const Type*> Elements;
396 unsigned Typ = read_vbr_uint(Buf, EndBuf);
397 while (Typ) { // List is terminated by void/0 typeid
398 Elements.push_back(getType(Typ));
399 Typ = read_vbr_uint(Buf, EndBuf);
400 }
401
402 Type* result = StructType::get(Elements);
403 handler->handleType( result );
404 return result;
405 }
406 case Type::PointerTyID: {
407 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
408 BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
409 Type* result = PointerType::get(getType(ElTyp));
410 handler->handleType( result );
411 return result;
412 }
413
414 case Type::OpaqueTyID: {
415 Type* result = OpaqueType::get();
416 handler->handleType( result );
417 return result;
418 }
419
420 default:
421 PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
422 return Val;
423 }
424}
425
426// ParseTypeConstants - We have to use this weird code to handle recursive
427// types. We know that recursive types will only reference the current slab of
428// values in the type plane, but they can forward reference types before they
429// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
430// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
431// this ugly problem, we pessimistically insert an opaque type for each type we
432// are about to read. This means that forward references will resolve to
433// something and when we reread the type later, we can replace the opaque type
434// with a new resolved concrete type.
435//
436void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
437 const unsigned char *EndBuf,
438 TypeListTy &Tab,
439 unsigned NumEntries) {
440 assert(Tab.size() == 0 && "should not have read type constants in before!");
441
442 // Insert a bunch of opaque types to be resolved later...
443 Tab.reserve(NumEntries);
444 for (unsigned i = 0; i != NumEntries; ++i)
445 Tab.push_back(OpaqueType::get());
446
447 // Loop through reading all of the types. Forward types will make use of the
448 // opaque types just inserted.
449 //
450 for (unsigned i = 0; i != NumEntries; ++i) {
451 const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
452 if (NewTy == 0) throw std::string("Couldn't parse type!");
453 BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
454 "' Replacing: " << OldTy << "\n");
455
456 // Don't insertValue the new type... instead we want to replace the opaque
457 // type with the new concrete value...
458 //
459
460 // Refine the abstract type to the new type. This causes all uses of the
461 // abstract type to use NewTy. This also will cause the opaque type to be
462 // deleted...
463 //
464 cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
465
466 // This should have replace the old opaque type with the new type in the
467 // value table... or with a preexisting type that was already in the system
468 assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
469 }
470
471 BCR_TRACE(5, "Resulting types:\n");
472 for (unsigned i = 0; i < NumEntries; ++i) {
473 BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
474 }
475}
476
477
478void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
479 const unsigned char *EndBuf,
480 unsigned TypeID) {
481
482 // We must check for a ConstantExpr before switching by type because
483 // a ConstantExpr can be of any type, and has no explicit value.
484 //
485 // 0 if not expr; numArgs if is expr
486 unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
487
488 if (isExprNumArgs) {
489 unsigned Opcode = read_vbr_uint(Buf, EndBuf);
490 const Type* Typ = getType(TypeID);
491
492 // FIXME: Encoding of constant exprs could be much more compact!
493 std::vector<std::pair<const Type*,unsigned> > ArgVec;
494 ArgVec.reserve(isExprNumArgs);
495
496 // Read the slot number and types of each of the arguments
497 for (unsigned i = 0; i != isExprNumArgs; ++i) {
498 unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
499 unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
500 BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
501 << "' slot: " << ArgValSlot << "\n");
502
503 // Get the arg value from its slot if it exists, otherwise a placeholder
504 ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
505 }
506
507 handler->handleConstantExpression( Opcode, Typ, ArgVec );
508 return;
509 }
510
511 // Ok, not an ConstantExpr. We now know how to read the given type...
512 const Type *Ty = getType(TypeID);
513 switch (Ty->getPrimitiveID()) {
514 case Type::BoolTyID: {
515 unsigned Val = read_vbr_uint(Buf, EndBuf);
516 if (Val != 0 && Val != 1)
517 PARSE_ERROR("Invalid boolean value read.");
518
519 handler->handleConstantValue( ConstantBool::get(Val == 1));
520 break;
521 }
522
523 case Type::UByteTyID: // Unsigned integer types...
524 case Type::UShortTyID:
525 case Type::UIntTyID: {
526 unsigned Val = read_vbr_uint(Buf, EndBuf);
527 if (!ConstantUInt::isValueValidForType(Ty, Val))
528 throw std::string("Invalid unsigned byte/short/int read.");
529 handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
530 break;
531 }
532
533 case Type::ULongTyID: {
534 handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
535 break;
536 }
537
538 case Type::SByteTyID: // Signed integer types...
539 case Type::ShortTyID:
540 case Type::IntTyID: {
541 case Type::LongTyID:
542 int64_t Val = read_vbr_int64(Buf, EndBuf);
543 if (!ConstantSInt::isValueValidForType(Ty, Val))
544 throw std::string("Invalid signed byte/short/int/long read.");
545 handler->handleConstantValue( ConstantSInt::get(Ty, Val) );
546 break;
547 }
548
549 case Type::FloatTyID: {
550 float F;
551 input_data(Buf, EndBuf, &F, &F+1);
552 handler->handleConstantValue( ConstantFP::get(Ty, F) );
553 break;
554 }
555
556 case Type::DoubleTyID: {
557 double Val;
558 input_data(Buf, EndBuf, &Val, &Val+1);
559 handler->handleConstantValue( ConstantFP::get(Ty, Val) );
560 break;
561 }
562
563 case Type::TypeTyID:
564 PARSE_ERROR("Type constants shouldn't live in constant table!");
565 break;
566
567 case Type::ArrayTyID: {
568 const ArrayType *AT = cast<ArrayType>(Ty);
569 unsigned NumElements = AT->getNumElements();
570 std::vector<unsigned> Elements;
571 Elements.reserve(NumElements);
572 while (NumElements--) // Read all of the elements of the constant.
573 Elements.push_back(read_vbr_uint(Buf, EndBuf));
574
575 handler->handleConstantArray( AT, Elements );
576 break;
577 }
578
579 case Type::StructTyID: {
580 const StructType *ST = cast<StructType>(Ty);
581 std::vector<unsigned> Elements;
582 Elements.reserve(ST->getNumElements());
583 for (unsigned i = 0; i != ST->getNumElements(); ++i)
584 Elements.push_back(read_vbr_uint(Buf, EndBuf));
585
586 handler->handleConstantStruct( ST, Elements );
587 }
588
589 case Type::PointerTyID: { // ConstantPointerRef value...
590 const PointerType *PT = cast<PointerType>(Ty);
591 unsigned Slot = read_vbr_uint(Buf, EndBuf);
592 handler->handleConstantPointer( PT, Slot );
593 }
594
595 default:
596 PARSE_ERROR("Don't know how to deserialize constant value of type '"+
597 Ty->getDescription());
598 }
599}
600
601void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
602 const unsigned char *EndBuf) {
603 ParseConstantPool(Buf, EndBuf, ModuleTypes);
604}
605
606void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
607 const unsigned char *EndBuf,
608 unsigned NumEntries ){
609 for (; NumEntries; --NumEntries) {
610 unsigned Typ = read_vbr_uint(Buf, EndBuf);
611 const Type *Ty = getType(Typ);
612 if (!isa<ArrayType>(Ty))
613 throw std::string("String constant data invalid!");
614
615 const ArrayType *ATy = cast<ArrayType>(Ty);
616 if (ATy->getElementType() != Type::SByteTy &&
617 ATy->getElementType() != Type::UByteTy)
618 throw std::string("String constant data invalid!");
619
620 // Read character data. The type tells us how long the string is.
621 char Data[ATy->getNumElements()];
622 input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
623
624 std::vector<Constant*> Elements(ATy->getNumElements());
625 if (ATy->getElementType() == Type::SByteTy)
626 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
627 Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
628 else
629 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
630 Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
631
632 // Create the constant, inserting it as needed.
633 ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
634 handler->handleConstantString( C );
635 }
636}
637
638
639void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
640 const unsigned char *EndBuf,
641 TypeListTy &TypeTab) {
642 while (Buf < EndBuf) {
643 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
644 unsigned Typ = read_vbr_uint(Buf, EndBuf);
645 if (Typ == Type::TypeTyID) {
646 ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
647 } else if (Typ == Type::VoidTyID) {
648 ParseStringConstants(Buf, EndBuf, NumEntries);
649 } else {
650 BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: "
651 << NumEntries << "\n");
652
653 for (unsigned i = 0; i < NumEntries; ++i) {
654 ParseConstantValue(Buf, EndBuf, Typ);
655 }
656 }
657 }
658
659 if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
660}
661
662void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
663
664 handler->handleModuleGlobalsBegin();
665
666 // Read global variables...
667 unsigned VarType = read_vbr_uint(Buf, End);
668 while (VarType != Type::VoidTyID) { // List is terminated by Void
669 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
670 // Linkage, bit4+ = slot#
671 unsigned SlotNo = VarType >> 5;
672 unsigned LinkageID = (VarType >> 2) & 7;
673 bool isConstant = VarType & 1;
674 bool hasInitializer = VarType & 2;
675 GlobalValue::LinkageTypes Linkage;
676
677 switch (LinkageID) {
678 case 0: Linkage = GlobalValue::ExternalLinkage; break;
679 case 1: Linkage = GlobalValue::WeakLinkage; break;
680 case 2: Linkage = GlobalValue::AppendingLinkage; break;
681 case 3: Linkage = GlobalValue::InternalLinkage; break;
682 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
683 default:
684 PARSE_ERROR("Unknown linkage type: " << LinkageID);
685 Linkage = GlobalValue::InternalLinkage;
686 break;
687 }
688
689 const Type *Ty = getType(SlotNo);
690 if ( !Ty ) {
691 PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
692 }
693
694 if ( !isa<PointerType>(Ty)) {
695 PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
696 }
697
698 const Type *ElTy = cast<PointerType>(Ty)->getElementType();
699
700 // Create the global variable...
701 if (hasInitializer)
702 handler->handleGlobalVariable( ElTy, isConstant, Linkage );
703 else {
704 unsigned initSlot = read_vbr_uint(Buf,End);
705 handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
706 }
707
708 // Get next item
709 VarType = read_vbr_uint(Buf, End);
710 }
711
712 // Read the function objects for all of the functions that are coming
713 unsigned FnSignature = read_vbr_uint(Buf, End);
714 while (FnSignature != Type::VoidTyID) { // List is terminated by Void
715 const Type *Ty = getType(FnSignature);
716 if (!isa<PointerType>(Ty) ||
717 !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
718 PARSE_ERROR( "Function not a pointer to function type! Ty = " +
719 Ty->getDescription());
720 // FIXME: what should Ty be if handler continues?
721 }
722
723 // We create functions by passing the underlying FunctionType to create...
724 Ty = cast<PointerType>(Ty)->getElementType();
725
726 // Save this for later so we know type of lazily instantiated functions
727 FunctionSignatureList.push_back(Ty);
728
729 handler->handleFunctionDeclaration(Ty);
730
731 // Get Next function signature
732 FnSignature = read_vbr_uint(Buf, End);
733 }
734
735 if (hasInconsistentModuleGlobalInfo)
736 align32(Buf, End);
737
738 // This is for future proofing... in the future extra fields may be added that
739 // we don't understand, so we transparently ignore them.
740 //
741 Buf = End;
742
743 handler->handleModuleGlobalsEnd();
744}
745
746void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
747 unsigned Version = read_vbr_uint(Buf, EndBuf);
748
749 // Unpack version number: low four bits are for flags, top bits = version
750 Module::Endianness Endianness;
751 Module::PointerSize PointerSize;
752 Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
753 PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
754
755 bool hasNoEndianness = Version & 4;
756 bool hasNoPointerSize = Version & 8;
757
758 RevisionNum = Version >> 4;
759
760 // Default values for the current bytecode version
761 hasInconsistentModuleGlobalInfo = false;
762 hasExplicitPrimitiveZeros = false;
763 hasRestrictedGEPTypes = false;
764
765 switch (RevisionNum) {
766 case 0: // LLVM 1.0, 1.1 release version
767 // Base LLVM 1.0 bytecode format.
768 hasInconsistentModuleGlobalInfo = true;
769 hasExplicitPrimitiveZeros = true;
770 // FALL THROUGH
771 case 1: // LLVM 1.2 release version
772 // LLVM 1.2 added explicit support for emitting strings efficiently.
773
774 // Also, it fixed the problem where the size of the ModuleGlobalInfo block
775 // included the size for the alignment at the end, where the rest of the
776 // blocks did not.
777
778 // LLVM 1.2 and before required that GEP indices be ubyte constants for
779 // structures and longs for sequential types.
780 hasRestrictedGEPTypes = true;
781
782 // FALL THROUGH
783 case 2: // LLVM 1.3 release version
784 break;
785
786 default:
787 PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
788 }
789
790 if (hasNoEndianness) Endianness = Module::AnyEndianness;
791 if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
792
793 handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
794}
795
796void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
797 unsigned Type, Size;
798 readBlock(Buf, EndBuf, Type, Size);
799 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
800 // Hrm, not a class?
801 PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
802 ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf)));
803
804 // Read into instance variables...
805 ParseVersionInfo(Buf, EndBuf);
806 align32(Buf, EndBuf);
807
808 bool SeenModuleGlobalInfo = false;
809 bool SeenGlobalTypePlane = false;
810 while (Buf < EndBuf) {
811 BufPtr OldBuf = Buf;
812 readBlock(Buf, EndBuf, Type, Size);
813
814 switch (Type) {
815
816 case BytecodeFormat::GlobalTypePlane:
817 if ( SeenGlobalTypePlane )
818 PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
819
820 ParseGlobalTypes(Buf, Buf+Size);
821 SeenGlobalTypePlane = true;
822 break;
823
824 case BytecodeFormat::ModuleGlobalInfo:
825 if ( SeenModuleGlobalInfo )
826 PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
827 ParseModuleGlobalInfo(Buf, Buf+Size);
828 SeenModuleGlobalInfo = true;
829 break;
830
831 case BytecodeFormat::ConstantPool:
832 ParseConstantPool(Buf, Buf+Size, ModuleTypes);
833 break;
834
835 case BytecodeFormat::Function:
836 ParseFunctionLazily(Buf, Buf+Size);
837 break;
838
839 case BytecodeFormat::SymbolTable:
840 ParseSymbolTable(Buf, Buf+Size );
841 break;
842
843 default:
844 Buf += Size;
845 if (OldBuf > Buf)
846 {
847 PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
848 }
849 break;
850 }
851 align32(Buf, EndBuf);
852 }
853}
854
855void AbstractBytecodeParser::ParseBytecode(
856 BufPtr Buf, unsigned Length,
857 const std::string &ModuleID) {
858
859 handler->handleStart();
860 unsigned char *EndBuf = (unsigned char*)(Buf + Length);
861
862 // Read and check signature...
863 unsigned Sig = read(Buf, EndBuf);
864 if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
865 PARSE_ERROR("Invalid bytecode signature: " << Sig);
866 }
867
868 handler->handleModuleBegin(ModuleID);
869
870 this->ParseModule(Buf, EndBuf);
871
872 handler->handleModuleEnd(ModuleID);
873
874 handler->handleFinish();
875}
876
877// vim: sw=2