blob: 743db6c31530fbb2eee0919ac6c1d35e1ade5df9 [file] [log] [blame]
Reid Spencerdac69c82004-06-07 17:53:43 +00001//===- Reader.cpp - Code to read bytecode files ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This library implements the functionality defined in llvm/Bytecode/Reader.h
11//
12// Note that this library should be as fast as possible, reentrant, and
13// threadsafe!!
14//
15// TODO: Allow passing in an option to ignore the symbol table
16//
17//===----------------------------------------------------------------------===//
18
19#include "AnalyzerInternals.h"
Reid Spencer926572c2004-06-09 06:14:52 +000020#include "ReaderPrimitives.h"
Reid Spencerdac69c82004-06-07 17:53:43 +000021#include "llvm/Module.h"
22#include "llvm/Bytecode/Format.h"
23#include "Support/StringExtras.h"
24#include <iostream>
25#include <sstream>
26
27using namespace llvm;
28
Reid Spencer926572c2004-06-09 06:14:52 +000029// Enable to trace to figure out what the heck is going on when parsing fails
30//#define TRACE_LEVEL 10
31//#define DEBUG_OUTPUT
32
33#if TRACE_LEVEL // ByteCodeReading_TRACEr
34#define BCR_TRACE(n, X) \
35 if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
36#else
37#define BCR_TRACE(n, X)
38#endif
39
Reid Spencerdac69c82004-06-07 17:53:43 +000040#define PARSE_ERROR(inserters) \
41 { \
42 std::ostringstream errormsg; \
43 errormsg << inserters; \
44 if ( ! handler->handleError( errormsg.str() ) ) \
45 throw std::string(errormsg.str()); \
46 }
47
Reid Spencer926572c2004-06-09 06:14:52 +000048
49inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf,
50 const unsigned char *EndBuf,
51 unsigned &Type, unsigned &Size)
52{
53 Type = read(Buf, EndBuf);
54 Size = read(Buf, EndBuf);
55}
56
Reid Spencerdac69c82004-06-07 17:53:43 +000057const Type *AbstractBytecodeParser::getType(unsigned ID) {
58 //cerr << "Looking up Type ID: " << ID << "\n";
59
60 if (ID < Type::FirstDerivedTyID)
61 if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID))
62 return T; // Asked for a primitive type...
63
64 // Otherwise, derived types need offset...
65 ID -= Type::FirstDerivedTyID;
66
67 if (!CompactionTypeTable.empty()) {
68 if (ID >= CompactionTypeTable.size())
69 PARSE_ERROR("Type ID out of range for compaction table!");
70 return CompactionTypeTable[ID];
71 }
72
73 // Is it a module-level type?
74 if (ID < ModuleTypes.size())
75 return ModuleTypes[ID].get();
76
77 // Nope, is it a function-level type?
78 ID -= ModuleTypes.size();
79 if (ID < FunctionTypes.size())
80 return FunctionTypes[ID].get();
81
82 PARSE_ERROR("Illegal type reference!");
83 return Type::VoidTy;
84}
85
86bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf,
87 std::vector<unsigned> &Operands) {
88 Operands.clear();
89 unsigned iType = 0;
90 unsigned Opcode = 0;
91 unsigned Op = read(Buf, EndBuf);
92
93 // bits Instruction format: Common to all formats
94 // --------------------------
95 // 01-00: Opcode type, fixed to 1.
96 // 07-02: Opcode
97 Opcode = (Op >> 2) & 63;
98 Operands.resize((Op >> 0) & 03);
99
100 switch (Operands.size()) {
101 case 1:
102 // bits Instruction format:
103 // --------------------------
104 // 19-08: Resulting type plane
105 // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
106 //
107 iType = (Op >> 8) & 4095;
108 Operands[0] = (Op >> 20) & 4095;
109 if (Operands[0] == 4095) // Handle special encoding for 0 operands...
110 Operands.resize(0);
111 break;
112 case 2:
113 // bits Instruction format:
114 // --------------------------
115 // 15-08: Resulting type plane
116 // 23-16: Operand #1
117 // 31-24: Operand #2
118 //
119 iType = (Op >> 8) & 255;
120 Operands[0] = (Op >> 16) & 255;
121 Operands[1] = (Op >> 24) & 255;
122 break;
123 case 3:
124 // bits Instruction format:
125 // --------------------------
126 // 13-08: Resulting type plane
127 // 19-14: Operand #1
128 // 25-20: Operand #2
129 // 31-26: Operand #3
130 //
131 iType = (Op >> 8) & 63;
132 Operands[0] = (Op >> 14) & 63;
133 Operands[1] = (Op >> 20) & 63;
134 Operands[2] = (Op >> 26) & 63;
135 break;
136 case 0:
137 Buf -= 4; // Hrm, try this again...
138 Opcode = read_vbr_uint(Buf, EndBuf);
139 Opcode >>= 2;
140 iType = read_vbr_uint(Buf, EndBuf);
141
142 unsigned NumOperands = read_vbr_uint(Buf, EndBuf);
143 Operands.resize(NumOperands);
144
145 if (NumOperands == 0)
146 PARSE_ERROR("Zero-argument instruction found; this is invalid.");
147
148 for (unsigned i = 0; i != NumOperands; ++i)
149 Operands[i] = read_vbr_uint(Buf, EndBuf);
150 align32(Buf, EndBuf);
151 break;
152 }
153
154 return handler->handleInstruction(Opcode, getType(iType), Operands);
155}
156
157/// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one
158/// basicblock at a time. This method reads in one of the basicblock packets.
159void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf,
160 BufPtr EndBuf,
161 unsigned BlockNo) {
162 handler->handleBasicBlockBegin( BlockNo );
163
164 std::vector<unsigned> Args;
165 bool is_terminating = false;
166 while (Buf < EndBuf)
167 is_terminating = ParseInstruction(Buf, EndBuf, Args);
168
169 if ( ! is_terminating )
170 PARSE_ERROR(
171 "Failed to recognize instruction as terminating at end of block");
172
173 handler->handleBasicBlockEnd( BlockNo );
174}
175
176
177/// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the
178/// body of a function. In post 1.0 bytecode files, we no longer emit basic
179/// block individually, in order to avoid per-basic-block overhead.
Reid Spencer5e8868d2004-06-08 05:54:47 +0000180unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf,
181 BufPtr EndBuf) {
Reid Spencerdac69c82004-06-07 17:53:43 +0000182 unsigned BlockNo = 0;
183 std::vector<unsigned> Args;
184
185 while (Buf < EndBuf) {
186 handler->handleBasicBlockBegin( BlockNo );
187
188 // Read instructions into this basic block until we get to a terminator
189 bool is_terminating = false;
190 while (Buf < EndBuf && !is_terminating )
191 is_terminating = ParseInstruction(Buf, EndBuf, Args ) ;
192
193 if (!is_terminating)
194 PARSE_ERROR( "Non-terminated basic block found!");
195
196 handler->handleBasicBlockEnd( BlockNo );
197 ++BlockNo;
198 }
199 return BlockNo;
200}
201
202void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) {
203 handler->handleSymbolTableBegin();
204
205 while (Buf < EndBuf) {
206 // Symtab block header: [num entries][type id number]
207 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
208 unsigned Typ = read_vbr_uint(Buf, EndBuf);
209 const Type *Ty = getType(Typ);
210
211 handler->handleSymbolTablePlane( Typ, NumEntries, Ty );
212
213 for (unsigned i = 0; i != NumEntries; ++i) {
214 // Symtab entry: [def slot #][name]
215 unsigned slot = read_vbr_uint(Buf, EndBuf);
216 std::string Name = read_str(Buf, EndBuf);
217
218 if (Typ == Type::TypeTyID)
219 handler->handleSymbolTableType( i, slot, Name );
220 else
221 handler->handleSymbolTableValue( i, slot, Name );
222 }
223 }
224
225 if (Buf > EndBuf)
226 PARSE_ERROR("Tried to read past end of buffer while reading symbol table.");
227
228 handler->handleSymbolTableEnd();
229}
230
231void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) {
232 if (FunctionSignatureList.empty())
233 throw std::string("FunctionSignatureList empty!");
234
235 const Type *FType = FunctionSignatureList.back();
236 FunctionSignatureList.pop_back();
237
238 // Save the information for future reading of the function
239 LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf);
240 // Pretend we've `parsed' this function
241 Buf = EndBuf;
242}
243
244void AbstractBytecodeParser::ParseNextFunction(Type* FType) {
245 // Find {start, end} pointers and slot in the map. If not there, we're done.
246 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(FType);
247
248 // Make sure we found it
249 if ( Fi == LazyFunctionLoadMap.end() ) {
250 PARSE_ERROR("Unrecognized function of type " << FType->getDescription());
251 return;
252 }
253
254 BufPtr Buf = Fi->second.Buf;
255 BufPtr EndBuf = Fi->second.EndBuf;
256 assert(Fi->first == FType);
257
258 LazyFunctionLoadMap.erase(Fi);
259
260 this->ParseFunctionBody( FType, Buf, EndBuf );
261}
262
263void AbstractBytecodeParser::ParseFunctionBody(const Type* FType,
264 BufPtr &Buf, BufPtr EndBuf ) {
265
266 GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage;
267
268 unsigned LinkageType = read_vbr_uint(Buf, EndBuf);
269 switch (LinkageType) {
270 case 0: Linkage = GlobalValue::ExternalLinkage; break;
271 case 1: Linkage = GlobalValue::WeakLinkage; break;
272 case 2: Linkage = GlobalValue::AppendingLinkage; break;
273 case 3: Linkage = GlobalValue::InternalLinkage; break;
274 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
275 default:
276 PARSE_ERROR("Invalid linkage type for Function.");
277 Linkage = GlobalValue::InternalLinkage;
278 break;
279 }
280
281 handler->handleFunctionBegin(FType,Linkage);
282
283 // Keep track of how many basic blocks we have read in...
284 unsigned BlockNum = 0;
285 bool InsertedArguments = false;
286
287 while (Buf < EndBuf) {
288 unsigned Type, Size;
289 BufPtr OldBuf = Buf;
290 readBlock(Buf, EndBuf, Type, Size);
291
292 switch (Type) {
293 case BytecodeFormat::ConstantPool:
294 ParseConstantPool(Buf, Buf+Size, FunctionTypes );
295 break;
296
297 case BytecodeFormat::CompactionTable:
298 ParseCompactionTable(Buf, Buf+Size);
299 break;
300
301 case BytecodeFormat::BasicBlock:
302 ParseBasicBlock(Buf, Buf+Size, BlockNum++);
303 break;
304
305 case BytecodeFormat::InstructionList:
306 if (BlockNum)
307 PARSE_ERROR("InstructionList must come before basic blocks!");
308 BlockNum = ParseInstructionList(Buf, Buf+Size);
309 break;
310
311 case BytecodeFormat::SymbolTable:
312 ParseSymbolTable(Buf, Buf+Size );
313 break;
314
315 default:
316 Buf += Size;
317 if (OldBuf > Buf)
318 PARSE_ERROR("Wrapped around reading bytecode");
319 break;
320 }
321
322 // Malformed bc file if read past end of block.
323 align32(Buf, EndBuf);
324 }
325
326 handler->handleFunctionEnd(FType);
327
328 // Clear out function-level types...
329 FunctionTypes.clear();
330 CompactionTypeTable.clear();
331}
332
333void AbstractBytecodeParser::ParseAllFunctionBodies() {
334 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin();
335 LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end();
336
337 while ( Fi != Fe ) {
338 const Type* FType = Fi->first;
339 this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf);
340 }
341}
342
343void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) {
344
345 handler->handleCompactionTableBegin();
346
347 while (Buf != End) {
348 unsigned NumEntries = read_vbr_uint(Buf, End);
349 unsigned Ty;
350
351 if ((NumEntries & 3) == 3) {
352 NumEntries >>= 2;
353 Ty = read_vbr_uint(Buf, End);
354 } else {
355 Ty = NumEntries >> 2;
356 NumEntries &= 3;
357 }
358
359 handler->handleCompactionTablePlane( Ty, NumEntries );
360
361 if (Ty == Type::TypeTyID) {
362 for (unsigned i = 0; i != NumEntries; ++i) {
363 unsigned TypeSlot = read_vbr_uint(Buf,End);
364 const Type *Typ = getGlobalTableType(TypeSlot);
365 handler->handleCompactionTableType( i, TypeSlot, Typ );
366 }
367 } else {
368 const Type *Typ = getType(Ty);
369 // Push the implicit zero
370 for (unsigned i = 0; i != NumEntries; ++i) {
371 unsigned ValSlot = read_vbr_uint(Buf, End);
372 handler->handleCompactionTableValue( i, ValSlot, Typ );
373 }
374 }
375 }
376 handler->handleCompactionTableEnd();
377}
378
379const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf,
380 const unsigned char *EndBuf) {
381 unsigned PrimType = read_vbr_uint(Buf, EndBuf);
382
383 const Type *Val = 0;
384 if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType)))
385 return Val;
386
387 switch (PrimType) {
388 case Type::FunctionTyID: {
389 const Type *RetType = getType(read_vbr_uint(Buf, EndBuf));
390
391 unsigned NumParams = read_vbr_uint(Buf, EndBuf);
392
393 std::vector<const Type*> Params;
394 while (NumParams--)
395 Params.push_back(getType(read_vbr_uint(Buf, EndBuf)));
396
397 bool isVarArg = Params.size() && Params.back() == Type::VoidTy;
398 if (isVarArg) Params.pop_back();
399
400 Type* result = FunctionType::get(RetType, Params, isVarArg);
401 handler->handleType( result );
402 return result;
403 }
404 case Type::ArrayTyID: {
405 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
406 const Type *ElementType = getType(ElTyp);
407
408 unsigned NumElements = read_vbr_uint(Buf, EndBuf);
409
410 BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size="
411 << NumElements << "\n");
412 Type* result = ArrayType::get(ElementType, NumElements);
413 handler->handleType( result );
414 return result;
415 }
416 case Type::StructTyID: {
417 std::vector<const Type*> Elements;
418 unsigned Typ = read_vbr_uint(Buf, EndBuf);
419 while (Typ) { // List is terminated by void/0 typeid
420 Elements.push_back(getType(Typ));
421 Typ = read_vbr_uint(Buf, EndBuf);
422 }
423
424 Type* result = StructType::get(Elements);
425 handler->handleType( result );
426 return result;
427 }
428 case Type::PointerTyID: {
429 unsigned ElTyp = read_vbr_uint(Buf, EndBuf);
430 BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n");
431 Type* result = PointerType::get(getType(ElTyp));
432 handler->handleType( result );
433 return result;
434 }
435
436 case Type::OpaqueTyID: {
437 Type* result = OpaqueType::get();
438 handler->handleType( result );
439 return result;
440 }
441
442 default:
443 PARSE_ERROR("Don't know how to deserialize primitive type" << PrimType << "\n");
444 return Val;
445 }
446}
447
448// ParseTypeConstants - We have to use this weird code to handle recursive
449// types. We know that recursive types will only reference the current slab of
450// values in the type plane, but they can forward reference types before they
451// have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
452// be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
453// this ugly problem, we pessimistically insert an opaque type for each type we
454// are about to read. This means that forward references will resolve to
455// something and when we reread the type later, we can replace the opaque type
456// with a new resolved concrete type.
457//
458void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf,
459 const unsigned char *EndBuf,
460 TypeListTy &Tab,
461 unsigned NumEntries) {
462 assert(Tab.size() == 0 && "should not have read type constants in before!");
463
464 // Insert a bunch of opaque types to be resolved later...
465 Tab.reserve(NumEntries);
466 for (unsigned i = 0; i != NumEntries; ++i)
467 Tab.push_back(OpaqueType::get());
468
469 // Loop through reading all of the types. Forward types will make use of the
470 // opaque types just inserted.
471 //
472 for (unsigned i = 0; i != NumEntries; ++i) {
473 const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get();
474 if (NewTy == 0) throw std::string("Couldn't parse type!");
475 BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy <<
476 "' Replacing: " << OldTy << "\n");
477
478 // Don't insertValue the new type... instead we want to replace the opaque
479 // type with the new concrete value...
480 //
481
482 // Refine the abstract type to the new type. This causes all uses of the
483 // abstract type to use NewTy. This also will cause the opaque type to be
484 // deleted...
485 //
486 cast<DerivedType>(const_cast<Type*>(OldTy))->refineAbstractTypeTo(NewTy);
487
488 // This should have replace the old opaque type with the new type in the
489 // value table... or with a preexisting type that was already in the system
490 assert(Tab[i] != OldTy && "refineAbstractType didn't work!");
491 }
492
493 BCR_TRACE(5, "Resulting types:\n");
494 for (unsigned i = 0; i < NumEntries; ++i) {
495 BCR_TRACE(5, (void*)Tab[i].get() << " - " << Tab[i].get() << "\n");
496 }
497}
498
499
500void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf,
501 const unsigned char *EndBuf,
502 unsigned TypeID) {
503
504 // We must check for a ConstantExpr before switching by type because
505 // a ConstantExpr can be of any type, and has no explicit value.
506 //
507 // 0 if not expr; numArgs if is expr
508 unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf);
509
510 if (isExprNumArgs) {
511 unsigned Opcode = read_vbr_uint(Buf, EndBuf);
512 const Type* Typ = getType(TypeID);
513
514 // FIXME: Encoding of constant exprs could be much more compact!
515 std::vector<std::pair<const Type*,unsigned> > ArgVec;
516 ArgVec.reserve(isExprNumArgs);
517
518 // Read the slot number and types of each of the arguments
519 for (unsigned i = 0; i != isExprNumArgs; ++i) {
520 unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf);
521 unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf);
522 BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot)
523 << "' slot: " << ArgValSlot << "\n");
524
525 // Get the arg value from its slot if it exists, otherwise a placeholder
526 ArgVec.push_back(std::make_pair(getType(ArgTypeSlot), ArgValSlot));
527 }
528
529 handler->handleConstantExpression( Opcode, Typ, ArgVec );
530 return;
531 }
532
533 // Ok, not an ConstantExpr. We now know how to read the given type...
534 const Type *Ty = getType(TypeID);
535 switch (Ty->getPrimitiveID()) {
536 case Type::BoolTyID: {
537 unsigned Val = read_vbr_uint(Buf, EndBuf);
538 if (Val != 0 && Val != 1)
539 PARSE_ERROR("Invalid boolean value read.");
540
541 handler->handleConstantValue( ConstantBool::get(Val == 1));
542 break;
543 }
544
545 case Type::UByteTyID: // Unsigned integer types...
546 case Type::UShortTyID:
547 case Type::UIntTyID: {
548 unsigned Val = read_vbr_uint(Buf, EndBuf);
549 if (!ConstantUInt::isValueValidForType(Ty, Val))
550 throw std::string("Invalid unsigned byte/short/int read.");
551 handler->handleConstantValue( ConstantUInt::get(Ty, Val) );
552 break;
553 }
554
555 case Type::ULongTyID: {
556 handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) );
557 break;
558 }
559
560 case Type::SByteTyID: // Signed integer types...
561 case Type::ShortTyID:
562 case Type::IntTyID: {
563 case Type::LongTyID:
564 int64_t Val = read_vbr_int64(Buf, EndBuf);
565 if (!ConstantSInt::isValueValidForType(Ty, Val))
566 throw std::string("Invalid signed byte/short/int/long read.");
567 handler->handleConstantValue( ConstantSInt::get(Ty, Val) );
568 break;
569 }
570
571 case Type::FloatTyID: {
572 float F;
573 input_data(Buf, EndBuf, &F, &F+1);
574 handler->handleConstantValue( ConstantFP::get(Ty, F) );
575 break;
576 }
577
578 case Type::DoubleTyID: {
579 double Val;
580 input_data(Buf, EndBuf, &Val, &Val+1);
581 handler->handleConstantValue( ConstantFP::get(Ty, Val) );
582 break;
583 }
584
585 case Type::TypeTyID:
586 PARSE_ERROR("Type constants shouldn't live in constant table!");
587 break;
588
589 case Type::ArrayTyID: {
590 const ArrayType *AT = cast<ArrayType>(Ty);
591 unsigned NumElements = AT->getNumElements();
592 std::vector<unsigned> Elements;
593 Elements.reserve(NumElements);
594 while (NumElements--) // Read all of the elements of the constant.
595 Elements.push_back(read_vbr_uint(Buf, EndBuf));
596
597 handler->handleConstantArray( AT, Elements );
598 break;
599 }
600
601 case Type::StructTyID: {
602 const StructType *ST = cast<StructType>(Ty);
603 std::vector<unsigned> Elements;
604 Elements.reserve(ST->getNumElements());
605 for (unsigned i = 0; i != ST->getNumElements(); ++i)
606 Elements.push_back(read_vbr_uint(Buf, EndBuf));
607
608 handler->handleConstantStruct( ST, Elements );
609 }
610
611 case Type::PointerTyID: { // ConstantPointerRef value...
612 const PointerType *PT = cast<PointerType>(Ty);
613 unsigned Slot = read_vbr_uint(Buf, EndBuf);
614 handler->handleConstantPointer( PT, Slot );
615 }
616
617 default:
618 PARSE_ERROR("Don't know how to deserialize constant value of type '"+
619 Ty->getDescription());
620 }
621}
622
623void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf,
624 const unsigned char *EndBuf) {
625 ParseConstantPool(Buf, EndBuf, ModuleTypes);
626}
627
628void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf,
629 const unsigned char *EndBuf,
630 unsigned NumEntries ){
631 for (; NumEntries; --NumEntries) {
632 unsigned Typ = read_vbr_uint(Buf, EndBuf);
633 const Type *Ty = getType(Typ);
634 if (!isa<ArrayType>(Ty))
635 throw std::string("String constant data invalid!");
636
637 const ArrayType *ATy = cast<ArrayType>(Ty);
638 if (ATy->getElementType() != Type::SByteTy &&
639 ATy->getElementType() != Type::UByteTy)
640 throw std::string("String constant data invalid!");
641
642 // Read character data. The type tells us how long the string is.
643 char Data[ATy->getNumElements()];
644 input_data(Buf, EndBuf, Data, Data+ATy->getNumElements());
645
646 std::vector<Constant*> Elements(ATy->getNumElements());
647 if (ATy->getElementType() == Type::SByteTy)
648 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
649 Elements[i] = ConstantSInt::get(Type::SByteTy, (signed char)Data[i]);
650 else
651 for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
652 Elements[i] = ConstantUInt::get(Type::UByteTy, (unsigned char)Data[i]);
653
654 // Create the constant, inserting it as needed.
655 ConstantArray *C = cast<ConstantArray>( ConstantArray::get(ATy, Elements) );
656 handler->handleConstantString( C );
657 }
658}
659
660
661void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf,
662 const unsigned char *EndBuf,
663 TypeListTy &TypeTab) {
664 while (Buf < EndBuf) {
665 unsigned NumEntries = read_vbr_uint(Buf, EndBuf);
666 unsigned Typ = read_vbr_uint(Buf, EndBuf);
667 if (Typ == Type::TypeTyID) {
668 ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries);
669 } else if (Typ == Type::VoidTyID) {
670 ParseStringConstants(Buf, EndBuf, NumEntries);
671 } else {
672 BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: "
673 << NumEntries << "\n");
674
675 for (unsigned i = 0; i < NumEntries; ++i) {
676 ParseConstantValue(Buf, EndBuf, Typ);
677 }
678 }
679 }
680
681 if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer.");
682}
683
684void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) {
685
686 handler->handleModuleGlobalsBegin();
687
688 // Read global variables...
689 unsigned VarType = read_vbr_uint(Buf, End);
690 while (VarType != Type::VoidTyID) { // List is terminated by Void
691 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
692 // Linkage, bit4+ = slot#
693 unsigned SlotNo = VarType >> 5;
694 unsigned LinkageID = (VarType >> 2) & 7;
695 bool isConstant = VarType & 1;
696 bool hasInitializer = VarType & 2;
697 GlobalValue::LinkageTypes Linkage;
698
699 switch (LinkageID) {
700 case 0: Linkage = GlobalValue::ExternalLinkage; break;
701 case 1: Linkage = GlobalValue::WeakLinkage; break;
702 case 2: Linkage = GlobalValue::AppendingLinkage; break;
703 case 3: Linkage = GlobalValue::InternalLinkage; break;
704 case 4: Linkage = GlobalValue::LinkOnceLinkage; break;
705 default:
706 PARSE_ERROR("Unknown linkage type: " << LinkageID);
707 Linkage = GlobalValue::InternalLinkage;
708 break;
709 }
710
711 const Type *Ty = getType(SlotNo);
712 if ( !Ty ) {
713 PARSE_ERROR("Global has no type! SlotNo=" << SlotNo);
714 }
715
716 if ( !isa<PointerType>(Ty)) {
717 PARSE_ERROR("Global not a pointer type! Ty= " << Ty->getDescription());
718 }
719
720 const Type *ElTy = cast<PointerType>(Ty)->getElementType();
721
722 // Create the global variable...
Reid Spencer5e8868d2004-06-08 05:54:47 +0000723 if (hasInitializer) {
Reid Spencerdac69c82004-06-07 17:53:43 +0000724 unsigned initSlot = read_vbr_uint(Buf,End);
725 handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot );
Reid Spencer5e8868d2004-06-08 05:54:47 +0000726 } else
727 handler->handleGlobalVariable( ElTy, isConstant, Linkage );
Reid Spencerdac69c82004-06-07 17:53:43 +0000728
729 // Get next item
730 VarType = read_vbr_uint(Buf, End);
731 }
732
733 // Read the function objects for all of the functions that are coming
734 unsigned FnSignature = read_vbr_uint(Buf, End);
735 while (FnSignature != Type::VoidTyID) { // List is terminated by Void
736 const Type *Ty = getType(FnSignature);
737 if (!isa<PointerType>(Ty) ||
738 !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
739 PARSE_ERROR( "Function not a pointer to function type! Ty = " +
740 Ty->getDescription());
741 // FIXME: what should Ty be if handler continues?
742 }
743
744 // We create functions by passing the underlying FunctionType to create...
745 Ty = cast<PointerType>(Ty)->getElementType();
746
747 // Save this for later so we know type of lazily instantiated functions
748 FunctionSignatureList.push_back(Ty);
749
750 handler->handleFunctionDeclaration(Ty);
751
752 // Get Next function signature
753 FnSignature = read_vbr_uint(Buf, End);
754 }
755
756 if (hasInconsistentModuleGlobalInfo)
757 align32(Buf, End);
758
759 // This is for future proofing... in the future extra fields may be added that
760 // we don't understand, so we transparently ignore them.
761 //
762 Buf = End;
763
764 handler->handleModuleGlobalsEnd();
765}
766
767void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) {
768 unsigned Version = read_vbr_uint(Buf, EndBuf);
769
770 // Unpack version number: low four bits are for flags, top bits = version
771 Module::Endianness Endianness;
772 Module::PointerSize PointerSize;
773 Endianness = (Version & 1) ? Module::BigEndian : Module::LittleEndian;
774 PointerSize = (Version & 2) ? Module::Pointer64 : Module::Pointer32;
775
776 bool hasNoEndianness = Version & 4;
777 bool hasNoPointerSize = Version & 8;
778
779 RevisionNum = Version >> 4;
780
781 // Default values for the current bytecode version
782 hasInconsistentModuleGlobalInfo = false;
783 hasExplicitPrimitiveZeros = false;
784 hasRestrictedGEPTypes = false;
785
786 switch (RevisionNum) {
787 case 0: // LLVM 1.0, 1.1 release version
788 // Base LLVM 1.0 bytecode format.
789 hasInconsistentModuleGlobalInfo = true;
790 hasExplicitPrimitiveZeros = true;
791 // FALL THROUGH
792 case 1: // LLVM 1.2 release version
793 // LLVM 1.2 added explicit support for emitting strings efficiently.
794
795 // Also, it fixed the problem where the size of the ModuleGlobalInfo block
796 // included the size for the alignment at the end, where the rest of the
797 // blocks did not.
798
799 // LLVM 1.2 and before required that GEP indices be ubyte constants for
800 // structures and longs for sequential types.
801 hasRestrictedGEPTypes = true;
802
803 // FALL THROUGH
804 case 2: // LLVM 1.3 release version
805 break;
806
807 default:
808 PARSE_ERROR("Unknown bytecode version number: " << RevisionNum);
809 }
810
811 if (hasNoEndianness) Endianness = Module::AnyEndianness;
812 if (hasNoPointerSize) PointerSize = Module::AnyPointerSize;
813
814 handler->handleVersionInfo(RevisionNum, Endianness, PointerSize );
815}
816
817void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) {
818 unsigned Type, Size;
819 readBlock(Buf, EndBuf, Type, Size);
820 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
821 // Hrm, not a class?
822 PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) <<
823 ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf)));
824
825 // Read into instance variables...
826 ParseVersionInfo(Buf, EndBuf);
827 align32(Buf, EndBuf);
828
829 bool SeenModuleGlobalInfo = false;
830 bool SeenGlobalTypePlane = false;
831 while (Buf < EndBuf) {
832 BufPtr OldBuf = Buf;
833 readBlock(Buf, EndBuf, Type, Size);
834
835 switch (Type) {
836
837 case BytecodeFormat::GlobalTypePlane:
838 if ( SeenGlobalTypePlane )
839 PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!");
840
841 ParseGlobalTypes(Buf, Buf+Size);
842 SeenGlobalTypePlane = true;
843 break;
844
845 case BytecodeFormat::ModuleGlobalInfo:
846 if ( SeenModuleGlobalInfo )
847 PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!");
848 ParseModuleGlobalInfo(Buf, Buf+Size);
849 SeenModuleGlobalInfo = true;
850 break;
851
852 case BytecodeFormat::ConstantPool:
853 ParseConstantPool(Buf, Buf+Size, ModuleTypes);
854 break;
855
856 case BytecodeFormat::Function:
857 ParseFunctionLazily(Buf, Buf+Size);
858 break;
859
860 case BytecodeFormat::SymbolTable:
861 ParseSymbolTable(Buf, Buf+Size );
862 break;
863
864 default:
865 Buf += Size;
866 if (OldBuf > Buf)
867 {
868 PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" );
869 }
870 break;
871 }
872 align32(Buf, EndBuf);
873 }
874}
875
876void AbstractBytecodeParser::ParseBytecode(
877 BufPtr Buf, unsigned Length,
878 const std::string &ModuleID) {
879
880 handler->handleStart();
881 unsigned char *EndBuf = (unsigned char*)(Buf + Length);
882
883 // Read and check signature...
884 unsigned Sig = read(Buf, EndBuf);
885 if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
886 PARSE_ERROR("Invalid bytecode signature: " << Sig);
887 }
888
889 handler->handleModuleBegin(ModuleID);
890
891 this->ParseModule(Buf, EndBuf);
892
893 handler->handleModuleEnd(ModuleID);
894
895 handler->handleFinish();
896}
897
898// vim: sw=2