blob: 45f761ea677d738288a3d7f49aba240f083246a9 [file] [log] [blame]
Reid Spencerdac69c82004-06-07 17:53:43 +00001//===- Reader.cpp - Code to read bytecode files ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This library implements the functionality defined in llvm/Bytecode/Reader.h
11//
12// Note that this library should be as fast as possible, reentrant, and
13// threadsafe!!
14//
15// TODO: Allow passing in an option to ignore the symbol table
16//
17//===----------------------------------------------------------------------===//
18
19#include "AnalyzerInternals.h"
20#include "llvm/Module.h"
21#include "llvm/Bytecode/Format.h"
22#include "Support/StringExtras.h"
23#include <iostream>
24#include <sstream>
25
26using namespace llvm;
27
28#define PARSE_ERROR(inserters) \
29 { \
30 std::ostringstream errormsg; \
31 errormsg << inserters; \
32 if ( ! handler->handleError( errormsg.str() ) ) \
33 throw std::string(errormsg.str()); \
34 }
35
36const Type *AbstractBytecodeParser::getType(unsigned ID) {
37 //cerr << "Looking up Type ID: " << ID << "\n";
38
39 if (ID < Type::FirstDerivedTyID)
40 if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
41 return T; // Asked for a primitive type...
42
43 // Otherwise, derived types need offset...
44 ID -= Type::FirstDerivedTyID;
45
46 if (!CompactionTypeTable.empty()) {
47 if (ID >= CompactionTypeTable.size())
48 PARSE_ERROR("Type ID out of range for compaction table!");
49 return CompactionTypeTable[ID];
50 }
51
52 // Is it a module-level type?
53 if (ID < ModuleTypes.size())
54 return ModuleTypes[ID].get();
55
56 // Nope, is it a function-level type?
57 ID -= ModuleTypes.size();
58 if (ID < FunctionTypes.size())
59 return FunctionTypes[ID].get();
60
61 PARSE_ERROR("Illegal type reference!");
62 return Type::VoidTy;
63}
64
65bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
66 std::vector<unsigned> &Operands) {
67 Operands.clear();
68 unsigned iType = 0;
69 unsigned Opcode = 0;
70 unsigned Op = read(Buf, EndBuf);
71
72 // bits Instruction format: Common to all formats
73 // --------------------------
74 // 01-00: Opcode type, fixed to 1.
75 // 07-02: Opcode
76 Opcode = (Op >> 2) & 63;
77 Operands.resize((Op >> 0) & 03);
78
79 switch (Operands.size()) {
80 case 1:
81 // bits Instruction format:
82 // --------------------------
83 // 19-08: Resulting type plane
84 // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
85 //
86 iType = (Op >> 8) & 4095;
87 Operands[0] = (Op >> 20) & 4095;
88 if (Operands[0] == 4095) // Handle special encoding for 0 operands...
89 Operands.resize(0);
90 break;
91 case 2:
92 // bits Instruction format:
93 // --------------------------
94 // 15-08: Resulting type plane
95 // 23-16: Operand #1
96 // 31-24: Operand #2
97 //
98 iType = (Op >> 8) & 255;
99 Operands[0] = (Op >> 16) & 255;
100 Operands[1] = (Op >> 24) & 255;
101 break;
102 case 3:
103 // bits Instruction format:
104 // --------------------------
105 // 13-08: Resulting type plane
106 // 19-14: Operand #1
107 // 25-20: Operand #2
108 // 31-26: Operand #3
109 //
110 iType = (Op >> 8) & 63;
111 Operands[0] = (Op >> 14) & 63;
112 Operands[1] = (Op >> 20) & 63;
113 Operands[2] = (Op >> 26) & 63;
114 break;
115 case 0:
116 Buf -= 4; // Hrm, try this again...
117 Opcode = read_vbr_uint(Buf, EndBuf);
118 Opcode >>= 2;
119 iType = read_vbr_uint(Buf, EndBuf);
120
121 unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
122 Operands.resize(NumOperands);
123
124 if (NumOperands == 0)
125 PARSE_ERROR("Zero-argument instruction found; this is invalid.");
126
127 for (unsigned i = 0; i != NumOperands; ++i)
128 Operands[i] = read_vbr_uint(Buf, EndBuf);
129 align32(Buf, EndBuf);
130 break;
131 }
132
133 return handler->handleInstruction(Opcode, getType(iType), Operands);
134}
135
136/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
137/// basicblock at a time. This method reads in one of the basicblock packets.
138void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
139 BufPtr EndBuf,
140 unsigned BlockNo) {
141 handler->handleBasicBlockBegin( BlockNo );
142
143 std::vector<unsigned> Args;
144 bool is_terminating = false;
145 while (Buf < EndBuf)
146 is_terminating = ParseInstruction(Buf, EndBuf, Args);
147
148 if ( ! is_terminating )
149 PARSE_ERROR(
150 "Failed to recognize instruction as terminating at end of block");
151
152 handler->handleBasicBlockEnd( BlockNo );
153}
154
155
156/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
157/// body of a function. In post 1.0 bytecode files, we no longer emit basic
158/// block individually, in order to avoid per-basic-block overhead.
Reid Spencer5e8868d2004-06-08 05:54:47 +0000159unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf,
160 BufPtr EndBuf) {
Reid Spencerdac69c82004-06-07 17:53:43 +0000161 unsigned BlockNo = 0;
162 std::vector<unsigned> Args;
163
164 while (Buf < EndBuf) {
165 handler->handleBasicBlockBegin( BlockNo );
166
167 // Read instructions into this basic block until we get to a terminator
168 bool is_terminating = false;
169 while (Buf < EndBuf && !is_terminating )
170 is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
171
172 if (!is_terminating)
173 PARSE_ERROR( "Non-terminated basic block found!");
174
175 handler->handleBasicBlockEnd( BlockNo );
176 ++BlockNo;
177 }
178 return BlockNo;
179}
180
181void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
182 handler->handleSymbolTableBegin();
183
184 while (Buf < EndBuf) {
185 // Symtab block header: [num entries][type id number]
186 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
187 unsigned Typ = read_vbr_uint(Buf, EndBuf);
188 const Type *Ty = getType(Typ);
189
190 handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
191
192 for (unsigned i = 0; i != NumEntries; ++i) {
193 // Symtab entry: [def slot #][name]
194 unsigned slot = read_vbr_uint(Buf, EndBuf);
195 std::string Name = read_str(Buf, EndBuf);
196
197 if (Typ == Type::TypeTyID)
198 handler->handleSymbolTableType( i, slot, Name );
199 else
200 handler->handleSymbolTableValue( i, slot, Name );
201 }
202 }
203
204 if (Buf > EndBuf)
205 PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
206
207 handler->handleSymbolTableEnd();
208}
209
210void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
211 if (FunctionSignatureList.empty())
212 throw std::string("FunctionSignatureList empty!");
213
214 const Type *FType = FunctionSignatureList.back();
215 FunctionSignatureList.pop_back();
216
217 // Save the information for future reading of the function
218 LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
219 // Pretend we've `parsed' this function
220 Buf = EndBuf;
221}
222
223void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
224 // Find {start, end} pointers and slot in the map. If not there, we're done.
225 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
226
227 // Make sure we found it
228 if ( Fi == LazyFunctionLoadMap.end() ) {
229 PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
230 return;
231 }
232
233 BufPtr Buf = Fi->second.Buf;
234 BufPtr EndBuf = Fi->second.EndBuf;
235 assert(Fi->first == FType);
236
237 LazyFunctionLoadMap.erase(Fi);
238
239 this->ParseFunctionBody( FType, Buf, EndBuf );
240}
241
242void AbstractBytecodeParser::ParseFunctionBody(const Type* FType,
243 BufPtr &Buf, BufPtr EndBuf ) {
244
245 GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
246
247 unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
248 switch (LinkageType) {
249 case 0: Linkage = GlobalValue::ExternalLinkage; break;
250 case 1: Linkage = GlobalValue::WeakLinkage; break;
251 case 2: Linkage = GlobalValue::AppendingLinkage; break;
252 case 3: Linkage = GlobalValue::InternalLinkage; break;
253 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
254 default:
255 PARSE_ERROR("Invalid linkage type for Function.");
256 Linkage = GlobalValue::InternalLinkage;
257 break;
258 }
259
260 handler->handleFunctionBegin(FType,Linkage);
261
262 // Keep track of how many basic blocks we have read in...
263 unsigned BlockNum = 0;
264 bool InsertedArguments = false;
265
266 while (Buf < EndBuf) {
267 unsigned Type, Size;
268 BufPtr OldBuf = Buf;
269 readBlock(Buf, EndBuf, Type, Size);
270
271 switch (Type) {
272 case BytecodeFormat::ConstantPool:
273 ParseConstantPool(Buf, Buf+Size, FunctionTypes );
274 break;
275
276 case BytecodeFormat::CompactionTable:
277 ParseCompactionTable(Buf, Buf+Size);
278 break;
279
280 case BytecodeFormat::BasicBlock:
281 ParseBasicBlock(Buf, Buf+Size, BlockNum++);
282 break;
283
284 case BytecodeFormat::InstructionList:
285 if (BlockNum)
286 PARSE_ERROR("InstructionList must come before basic blocks!");
287 BlockNum = ParseInstructionList(Buf, Buf+Size);
288 break;
289
290 case BytecodeFormat::SymbolTable:
291 ParseSymbolTable(Buf, Buf+Size );
292 break;
293
294 default:
295 Buf += Size;
296 if (OldBuf > Buf)
297 PARSE_ERROR("Wrapped around reading bytecode");
298 break;
299 }
300
301 // Malformed bc file if read past end of block.
302 align32(Buf, EndBuf);
303 }
304
305 handler->handleFunctionEnd(FType);
306
307 // Clear out function-level types...
308 FunctionTypes.clear();
309 CompactionTypeTable.clear();
310}
311
312void AbstractBytecodeParser::ParseAllFunctionBodies() {
313 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
314 LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
315
316 while ( Fi != Fe ) {
317 const Type* FType = Fi->first;
318 this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
319 }
320}
321
322void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
323
324 handler->handleCompactionTableBegin();
325
326 while (Buf != End) {
327 unsigned NumEntries = read_vbr_uint(Buf, End);
328 unsigned Ty;
329
330 if ((NumEntries & 3) == 3) {
331 NumEntries >>= 2;
332 Ty = read_vbr_uint(Buf, End);
333 } else {
334 Ty = NumEntries >> 2;
335 NumEntries &= 3;
336 }
337
338 handler->handleCompactionTablePlane( Ty, NumEntries );
339
340 if (Ty == Type::TypeTyID) {
341 for (unsigned i = 0; i != NumEntries; ++i) {
342 unsigned TypeSlot = read_vbr_uint(Buf,End);
343 const Type *Typ = getGlobalTableType(TypeSlot);
344 handler->handleCompactionTableType( i, TypeSlot, Typ );
345 }
346 } else {
347 const Type *Typ = getType(Ty);
348 // Push the implicit zero
349 for (unsigned i = 0; i != NumEntries; ++i) {
350 unsigned ValSlot = read_vbr_uint(Buf, End);
351 handler->handleCompactionTableValue( i, ValSlot, Typ );
352 }
353 }
354 }
355 handler->handleCompactionTableEnd();
356}
357
358const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
359 const unsigned char *EndBuf) {
360 unsigned PrimType = read_vbr_uint(Buf, EndBuf);
361
362 const Type *Val = 0;
363 if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
364 return Val;
365
366 switch (PrimType) {
367 case Type::FunctionTyID: {
368 const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
369
370 unsigned NumParams = read_vbr_uint(Buf, EndBuf);
371
372 std::vector<const Type*> Params;
373 while (NumParams--)
374 Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
375
376 bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
377 if (isVarArg) Params.pop_back();
378
379 Type* result = FunctionType::get(RetType, Params, isVarArg);
380 handler->handleType( result );
381 return result;
382 }
383 case Type::ArrayTyID: {
384 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
385 const Type *ElementType = getType(ElTyp);
386
387 unsigned NumElements = read_vbr_uint(Buf, EndBuf);
388
389 BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size="
390 << NumElements << "\n");
391 Type* result = ArrayType::get(ElementType, NumElements);
392 handler->handleType( result );
393 return result;
394 }
395 case Type::StructTyID: {
396 std::vector<const Type*> Elements;
397 unsigned Typ = read_vbr_uint(Buf, EndBuf);
398 while (Typ) { // List is terminated by void/0 typeid
399 Elements.push_back(getType(Typ));
400 Typ = read_vbr_uint(Buf, EndBuf);
401 }
402
403 Type* result = StructType::get(Elements);
404 handler->handleType( result );
405 return result;
406 }
407 case Type::PointerTyID: {
408 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
409 BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
410 Type* result = PointerType::get(getType(ElTyp));
411 handler->handleType( result );
412 return result;
413 }
414
415 case Type::OpaqueTyID: {
416 Type* result = OpaqueType::get();
417 handler->handleType( result );
418 return result;
419 }
420
421 default:
422 PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
423 return Val;
424 }
425}
426
427// ParseTypeConstants - We have to use this weird code to handle recursive
428// types. We know that recursive types will only reference the current slab of
429// values in the type plane, but they can forward reference types before they
430// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
431// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
432// this ugly problem, we pessimistically insert an opaque type for each type we
433// are about to read. This means that forward references will resolve to
434// something and when we reread the type later, we can replace the opaque type
435// with a new resolved concrete type.
436//
437void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
438 const unsigned char *EndBuf,
439 TypeListTy &Tab,
440 unsigned NumEntries) {
441 assert(Tab.size() == 0 && "should not have read type constants in before!");
442
443 // Insert a bunch of opaque types to be resolved later...
444 Tab.reserve(NumEntries);
445 for (unsigned i = 0; i != NumEntries; ++i)
446 Tab.push_back(OpaqueType::get());
447
448 // Loop through reading all of the types. Forward types will make use of the
449 // opaque types just inserted.
450 //
451 for (unsigned i = 0; i != NumEntries; ++i) {
452 const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
453 if (NewTy == 0) throw std::string("Couldn't parse type!");
454 BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
455 "' Replacing: " << OldTy << "\n");
456
457 // Don't insertValue the new type... instead we want to replace the opaque
458 // type with the new concrete value...
459 //
460
461 // Refine the abstract type to the new type. This causes all uses of the
462 // abstract type to use NewTy. This also will cause the opaque type to be
463 // deleted...
464 //
465 cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
466
467 // This should have replace the old opaque type with the new type in the
468 // value table... or with a preexisting type that was already in the system
469 assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
470 }
471
472 BCR_TRACE(5, "Resulting types:\n");
473 for (unsigned i = 0; i < NumEntries; ++i) {
474 BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
475 }
476}
477
478
479void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
480 const unsigned char *EndBuf,
481 unsigned TypeID) {
482
483 // We must check for a ConstantExpr before switching by type because
484 // a ConstantExpr can be of any type, and has no explicit value.
485 //
486 // 0 if not expr; numArgs if is expr
487 unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
488
489 if (isExprNumArgs) {
490 unsigned Opcode = read_vbr_uint(Buf, EndBuf);
491 const Type* Typ = getType(TypeID);
492
493 // FIXME: Encoding of constant exprs could be much more compact!
494 std::vector<std::pair<const Type*,unsigned> > ArgVec;
495 ArgVec.reserve(isExprNumArgs);
496
497 // Read the slot number and types of each of the arguments
498 for (unsigned i = 0; i != isExprNumArgs; ++i) {
499 unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
500 unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
501 BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
502 << "' slot: " << ArgValSlot << "\n");
503
504 // Get the arg value from its slot if it exists, otherwise a placeholder
505 ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
506 }
507
508 handler->handleConstantExpression( Opcode, Typ, ArgVec );
509 return;
510 }
511
512 // Ok, not an ConstantExpr. We now know how to read the given type...
513 const Type *Ty = getType(TypeID);
514 switch (Ty->getPrimitiveID()) {
515 case Type::BoolTyID: {
516 unsigned Val = read_vbr_uint(Buf, EndBuf);
517 if (Val != 0 && Val != 1)
518 PARSE_ERROR("Invalid boolean value read.");
519
520 handler->handleConstantValue( ConstantBool::get(Val == 1));
521 break;
522 }
523
524 case Type::UByteTyID: // Unsigned integer types...
525 case Type::UShortTyID:
526 case Type::UIntTyID: {
527 unsigned Val = read_vbr_uint(Buf, EndBuf);
528 if (!ConstantUInt::isValueValidForType(Ty, Val))
529 throw std::string("Invalid unsigned byte/short/int read.");
530 handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
531 break;
532 }
533
534 case Type::ULongTyID: {
535 handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
536 break;
537 }
538
539 case Type::SByteTyID: // Signed integer types...
540 case Type::ShortTyID:
541 case Type::IntTyID: {
542 case Type::LongTyID:
543 int64_t Val = read_vbr_int64(Buf, EndBuf);
544 if (!ConstantSInt::isValueValidForType(Ty, Val))
545 throw std::string("Invalid signed byte/short/int/long read.");
546 handler->handleConstantValue( ConstantSInt::get(Ty, Val) );
547 break;
548 }
549
550 case Type::FloatTyID: {
551 float F;
552 input_data(Buf, EndBuf, &F, &F+1);
553 handler->handleConstantValue( ConstantFP::get(Ty, F) );
554 break;
555 }
556
557 case Type::DoubleTyID: {
558 double Val;
559 input_data(Buf, EndBuf, &Val, &Val+1);
560 handler->handleConstantValue( ConstantFP::get(Ty, Val) );
561 break;
562 }
563
564 case Type::TypeTyID:
565 PARSE_ERROR("Type constants shouldn't live in constant table!");
566 break;
567
568 case Type::ArrayTyID: {
569 const ArrayType *AT = cast<ArrayType>(Ty);
570 unsigned NumElements = AT->getNumElements();
571 std::vector<unsigned> Elements;
572 Elements.reserve(NumElements);
573 while (NumElements--) // Read all of the elements of the constant.
574 Elements.push_back(read_vbr_uint(Buf, EndBuf));
575
576 handler->handleConstantArray( AT, Elements );
577 break;
578 }
579
580 case Type::StructTyID: {
581 const StructType *ST = cast<StructType>(Ty);
582 std::vector<unsigned> Elements;
583 Elements.reserve(ST->getNumElements());
584 for (unsigned i = 0; i != ST->getNumElements(); ++i)
585 Elements.push_back(read_vbr_uint(Buf, EndBuf));
586
587 handler->handleConstantStruct( ST, Elements );
588 }
589
590 case Type::PointerTyID: { // ConstantPointerRef value...
591 const PointerType *PT = cast<PointerType>(Ty);
592 unsigned Slot = read_vbr_uint(Buf, EndBuf);
593 handler->handleConstantPointer( PT, Slot );
594 }
595
596 default:
597 PARSE_ERROR("Don't know how to deserialize constant value of type '"+
598 Ty->getDescription());
599 }
600}
601
602void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
603 const unsigned char *EndBuf) {
604 ParseConstantPool(Buf, EndBuf, ModuleTypes);
605}
606
607void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
608 const unsigned char *EndBuf,
609 unsigned NumEntries ){
610 for (; NumEntries; --NumEntries) {
611 unsigned Typ = read_vbr_uint(Buf, EndBuf);
612 const Type *Ty = getType(Typ);
613 if (!isa<ArrayType>(Ty))
614 throw std::string("String constant data invalid!");
615
616 const ArrayType *ATy = cast<ArrayType>(Ty);
617 if (ATy->getElementType() != Type::SByteTy &&
618 ATy->getElementType() != Type::UByteTy)
619 throw std::string("String constant data invalid!");
620
621 // Read character data. The type tells us how long the string is.
622 char Data[ATy->getNumElements()];
623 input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
624
625 std::vector<Constant*> Elements(ATy->getNumElements());
626 if (ATy->getElementType() == Type::SByteTy)
627 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
628 Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
629 else
630 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
631 Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
632
633 // Create the constant, inserting it as needed.
634 ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
635 handler->handleConstantString( C );
636 }
637}
638
639
640void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
641 const unsigned char *EndBuf,
642 TypeListTy &TypeTab) {
643 while (Buf < EndBuf) {
644 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
645 unsigned Typ = read_vbr_uint(Buf, EndBuf);
646 if (Typ == Type::TypeTyID) {
647 ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
648 } else if (Typ == Type::VoidTyID) {
649 ParseStringConstants(Buf, EndBuf, NumEntries);
650 } else {
651 BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: "
652 << NumEntries << "\n");
653
654 for (unsigned i = 0; i < NumEntries; ++i) {
655 ParseConstantValue(Buf, EndBuf, Typ);
656 }
657 }
658 }
659
660 if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
661}
662
663void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
664
665 handler->handleModuleGlobalsBegin();
666
667 // Read global variables...
668 unsigned VarType = read_vbr_uint(Buf, End);
669 while (VarType != Type::VoidTyID) { // List is terminated by Void
670 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
671 // Linkage, bit4+ = slot#
672 unsigned SlotNo = VarType >> 5;
673 unsigned LinkageID = (VarType >> 2) & 7;
674 bool isConstant = VarType & 1;
675 bool hasInitializer = VarType & 2;
676 GlobalValue::LinkageTypes Linkage;
677
678 switch (LinkageID) {
679 case 0: Linkage = GlobalValue::ExternalLinkage; break;
680 case 1: Linkage = GlobalValue::WeakLinkage; break;
681 case 2: Linkage = GlobalValue::AppendingLinkage; break;
682 case 3: Linkage = GlobalValue::InternalLinkage; break;
683 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
684 default:
685 PARSE_ERROR("Unknown linkage type: " << LinkageID);
686 Linkage = GlobalValue::InternalLinkage;
687 break;
688 }
689
690 const Type *Ty = getType(SlotNo);
691 if ( !Ty ) {
692 PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
693 }
694
695 if ( !isa<PointerType>(Ty)) {
696 PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
697 }
698
699 const Type *ElTy = cast<PointerType>(Ty)->getElementType();
700
701 // Create the global variable...
Reid Spencer5e8868d2004-06-08 05:54:47 +0000702 if (hasInitializer) {
Reid Spencerdac69c82004-06-07 17:53:43 +0000703 unsigned initSlot = read_vbr_uint(Buf,End);
704 handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
Reid Spencer5e8868d2004-06-08 05:54:47 +0000705 } else
706 handler->handleGlobalVariable( ElTy, isConstant, Linkage );
Reid Spencerdac69c82004-06-07 17:53:43 +0000707
708 // Get next item
709 VarType = read_vbr_uint(Buf, End);
710 }
711
712 // Read the function objects for all of the functions that are coming
713 unsigned FnSignature = read_vbr_uint(Buf, End);
714 while (FnSignature != Type::VoidTyID) { // List is terminated by Void
715 const Type *Ty = getType(FnSignature);
716 if (!isa<PointerType>(Ty) ||
717 !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
718 PARSE_ERROR( "Function not a pointer to function type! Ty = " +
719 Ty->getDescription());
720 // FIXME: what should Ty be if handler continues?
721 }
722
723 // We create functions by passing the underlying FunctionType to create...
724 Ty = cast<PointerType>(Ty)->getElementType();
725
726 // Save this for later so we know type of lazily instantiated functions
727 FunctionSignatureList.push_back(Ty);
728
729 handler->handleFunctionDeclaration(Ty);
730
731 // Get Next function signature
732 FnSignature = read_vbr_uint(Buf, End);
733 }
734
735 if (hasInconsistentModuleGlobalInfo)
736 align32(Buf, End);
737
738 // This is for future proofing... in the future extra fields may be added that
739 // we don't understand, so we transparently ignore them.
740 //
741 Buf = End;
742
743 handler->handleModuleGlobalsEnd();
744}
745
746void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
747 unsigned Version = read_vbr_uint(Buf, EndBuf);
748
749 // Unpack version number: low four bits are for flags, top bits = version
750 Module::Endianness Endianness;
751 Module::PointerSize PointerSize;
752 Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
753 PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
754
755 bool hasNoEndianness = Version & 4;
756 bool hasNoPointerSize = Version & 8;
757
758 RevisionNum = Version >> 4;
759
760 // Default values for the current bytecode version
761 hasInconsistentModuleGlobalInfo = false;
762 hasExplicitPrimitiveZeros = false;
763 hasRestrictedGEPTypes = false;
764
765 switch (RevisionNum) {
766 case 0: // LLVM 1.0, 1.1 release version
767 // Base LLVM 1.0 bytecode format.
768 hasInconsistentModuleGlobalInfo = true;
769 hasExplicitPrimitiveZeros = true;
770 // FALL THROUGH
771 case 1: // LLVM 1.2 release version
772 // LLVM 1.2 added explicit support for emitting strings efficiently.
773
774 // Also, it fixed the problem where the size of the ModuleGlobalInfo block
775 // included the size for the alignment at the end, where the rest of the
776 // blocks did not.
777
778 // LLVM 1.2 and before required that GEP indices be ubyte constants for
779 // structures and longs for sequential types.
780 hasRestrictedGEPTypes = true;
781
782 // FALL THROUGH
783 case 2: // LLVM 1.3 release version
784 break;
785
786 default:
787 PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
788 }
789
790 if (hasNoEndianness) Endianness = Module::AnyEndianness;
791 if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
792
793 handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
794}
795
796void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
797 unsigned Type, Size;
798 readBlock(Buf, EndBuf, Type, Size);
799 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
800 // Hrm, not a class?
801 PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
802 ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf)));
803
804 // Read into instance variables...
805 ParseVersionInfo(Buf, EndBuf);
806 align32(Buf, EndBuf);
807
808 bool SeenModuleGlobalInfo = false;
809 bool SeenGlobalTypePlane = false;
810 while (Buf < EndBuf) {
811 BufPtr OldBuf = Buf;
812 readBlock(Buf, EndBuf, Type, Size);
813
814 switch (Type) {
815
816 case BytecodeFormat::GlobalTypePlane:
817 if ( SeenGlobalTypePlane )
818 PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
819
820 ParseGlobalTypes(Buf, Buf+Size);
821 SeenGlobalTypePlane = true;
822 break;
823
824 case BytecodeFormat::ModuleGlobalInfo:
825 if ( SeenModuleGlobalInfo )
826 PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
827 ParseModuleGlobalInfo(Buf, Buf+Size);
828 SeenModuleGlobalInfo = true;
829 break;
830
831 case BytecodeFormat::ConstantPool:
832 ParseConstantPool(Buf, Buf+Size, ModuleTypes);
833 break;
834
835 case BytecodeFormat::Function:
836 ParseFunctionLazily(Buf, Buf+Size);
837 break;
838
839 case BytecodeFormat::SymbolTable:
840 ParseSymbolTable(Buf, Buf+Size );
841 break;
842
843 default:
844 Buf += Size;
845 if (OldBuf > Buf)
846 {
847 PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
848 }
849 break;
850 }
851 align32(Buf, EndBuf);
852 }
853}
854
855void AbstractBytecodeParser::ParseBytecode(
856 BufPtr Buf, unsigned Length,
857 const std::string &ModuleID) {
858
859 handler->handleStart();
860 unsigned char *EndBuf = (unsigned char*)(Buf + Length);
861
862 // Read and check signature...
863 unsigned Sig = read(Buf, EndBuf);
864 if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
865 PARSE_ERROR("Invalid bytecode signature: " << Sig);
866 }
867
868 handler->handleModuleBegin(ModuleID);
869
870 this->ParseModule(Buf, EndBuf);
871
872 handler->handleModuleEnd(ModuleID);
873
874 handler->handleFinish();
875}
876
877// vim: sw=2