blob: 08758cb5f29049ada6667df2ba7dccdbb9d5da0f [file] [log] [blame]
Reid Spencer29dba0c2004-06-08 05:51:18 +00001//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===//
Reid Spencerdac69c82004-06-07 17:53:43 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Reid Spencer29dba0c2004-06-08 05:51:18 +000010// This header file defines the interface to the Bytecode Parser and the
11// Bytecode Handler interface that it calls.
Reid Spencerdac69c82004-06-07 17:53:43 +000012//
13//===----------------------------------------------------------------------===//
14
15#ifndef BYTECODE_PARSER_H
16#define BYTECODE_PARSER_H
17
Reid Spencerdac69c82004-06-07 17:53:43 +000018#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
Reid Spencer29dba0c2004-06-08 05:51:18 +000020#include "llvm/GlobalValue.h"
21#include "llvm/Module.h"
Reid Spencerdac69c82004-06-07 17:53:43 +000022#include <utility>
23#include <vector>
24#include <map>
25
26namespace llvm {
27
Reid Spencer29dba0c2004-06-08 05:51:18 +000028class BytecodeHandler; ///< Forward declare the handler interface
Reid Spencerdac69c82004-06-07 17:53:43 +000029
Reid Spencer29dba0c2004-06-08 05:51:18 +000030/// This class defines the interface for parsing a buffer of bytecode. The
31/// parser itself takes no action except to call the various functions of
32/// the handler interface. The parser's sole responsibility is the correct
33/// interpretation of the bytecode buffer. The handler is responsible for
34/// instantiating and keeping track of all values. As a convenience, the parser
35/// is responsible for materializing types and will pass them through the
36/// handler interface as necessary.
37/// @see BytecodeHandler
38/// @brief Abstract Bytecode Parser interface
Reid Spencerdac69c82004-06-07 17:53:43 +000039class AbstractBytecodeParser {
Reid Spencer29dba0c2004-06-08 05:51:18 +000040
41/// @name Constructors
42/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +000043public:
Reid Spencer00c28a72004-06-10 08:09:13 +000044 AbstractBytecodeParser(
45 BytecodeHandler* h,
46 bool repAlignment = false,
47 bool repBlocks = false,
48 bool repVBR = false
49 ) {
50 handler = h;
51 reportAlignment = repAlignment;
52 reportBlocks = repBlocks;
53 reportVBR = repVBR;
54 }
55
Reid Spencerdac69c82004-06-07 17:53:43 +000056 ~AbstractBytecodeParser() { }
57
Reid Spencer29dba0c2004-06-08 05:51:18 +000058/// @}
59/// @name Types
60/// @{
61public:
62 /// @brief A convenience type for the buffer pointer
63 typedef const unsigned char* BufPtr;
64
65 /// @brief The type used for vector of potentially abstract types
66 typedef std::vector<PATypeHolder> TypeListTy;
67
68 /// @brief
69
70/// @}
71/// @name Methods
72/// @{
73public:
74
75 /// @brief Main interface to parsing a bytecode buffer.
Reid Spencerdac69c82004-06-07 17:53:43 +000076 void ParseBytecode(const unsigned char *Buf, unsigned Length,
77 const std::string &ModuleID);
78
Reid Spencer29dba0c2004-06-08 05:51:18 +000079 /// The ParseBytecode method lazily parses functions. Use this
80 /// method to cause the parser to actually parse all the function bodies
81 /// in the bytecode buffer.
82 /// @see ParseBytecode
83 /// @brief Parse all function bodies
84 void ParseAllFunctionBodies ();
Reid Spencerdac69c82004-06-07 17:53:43 +000085
Reid Spencer29dba0c2004-06-08 05:51:18 +000086 /// The Parsebytecode method lazily parses functions. Use this
87 /// method to casue the parser to parse the next function of a given
88 /// types. Note that this will remove the function from what is to be
89 /// included by ParseAllFunctionBodies.
90 /// @see ParseAllFunctionBodies
91 /// @see ParseBytecode
92 /// @brief Parse the next function of specific type
93 void ParseNextFunction (Type* FType) ;
94
95/// @}
96/// @name Parsing Units For Subclasses
97/// @{
98protected:
99 /// @brief Parse whole module scope
Reid Spencer00c28a72004-06-10 08:09:13 +0000100 void ParseModule ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000101
102 /// @brief Parse the version information block
Reid Spencer00c28a72004-06-10 08:09:13 +0000103 void ParseVersionInfo ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000104
105 /// @brief Parse the ModuleGlobalInfo block
Reid Spencer00c28a72004-06-10 08:09:13 +0000106 void ParseModuleGlobalInfo ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000107
108 /// @brief Parse a symbol table
Reid Spencer00c28a72004-06-10 08:09:13 +0000109 void ParseSymbolTable ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000110
111 /// This function parses LLVM functions lazily. It obtains the type of the
112 /// function and records where the body of the function is in the bytecode
113 /// buffer. The caller can then use the ParseNextFunction and
114 /// ParseAllFunctionBodies to get handler events for the functions.
115 /// @brief Parse functions lazily.
Reid Spencer00c28a72004-06-10 08:09:13 +0000116 void ParseFunctionLazily ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000117
118 /// @brief Parse a function body
Reid Spencer00c28a72004-06-10 08:09:13 +0000119 void ParseFunctionBody (const Type* FType);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000120
121 /// @brief Parse a compaction table
Reid Spencer00c28a72004-06-10 08:09:13 +0000122 void ParseCompactionTable ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000123
124 /// @brief Parse global types
Reid Spencer00c28a72004-06-10 08:09:13 +0000125 void ParseGlobalTypes ();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000126
127 /// @brief Parse a basic block (for LLVM 1.0 basic block blocks)
Reid Spencer00c28a72004-06-10 08:09:13 +0000128 void ParseBasicBlock (unsigned BlockNo);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000129
130 /// @brief parse an instruction list (for post LLVM 1.0 instruction lists
131 /// with blocks differentiated by terminating instructions.
Reid Spencer00c28a72004-06-10 08:09:13 +0000132 unsigned ParseInstructionList();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000133
134 /// @brief Parse an instruction.
Reid Spencer00c28a72004-06-10 08:09:13 +0000135 bool ParseInstruction (std::vector<unsigned>& Args);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000136
137 /// @brief Parse a constant pool
Reid Spencer00c28a72004-06-10 08:09:13 +0000138 void ParseConstantPool (TypeListTy& List);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000139
140 /// @brief Parse a constant value
Reid Spencer00c28a72004-06-10 08:09:13 +0000141 void ParseConstantValue (unsigned TypeID);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000142
143 /// @brief Parse a block of types.
Reid Spencer00c28a72004-06-10 08:09:13 +0000144 void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000145
146 /// @brief Parse a single type.
Reid Spencer00c28a72004-06-10 08:09:13 +0000147 const Type *ParseTypeConstant();
Reid Spencer29dba0c2004-06-08 05:51:18 +0000148
149 /// @brief Parse a string constants block
Reid Spencer00c28a72004-06-10 08:09:13 +0000150 void ParseStringConstants (unsigned NumEntries);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000151
152/// @}
153/// @name Data
154/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +0000155private:
Reid Spencer00c28a72004-06-10 08:09:13 +0000156 BufPtr MemStart; ///< Start of the memory buffer
157 BufPtr MemEnd; ///< End of the memory buffer
158 BufPtr BlockStart; ///< Start of current block being parsed
159 BufPtr BlockEnd; ///< End of current block being parsed
160 BufPtr At; ///< Where we're currently parsing at
161
162 bool reportAlignment; ///< Parser should report alignment?
163 bool reportBlocks; ///< Parser should report blocks?
164 bool reportVBR; ///< Report VBR compression events
165
Reid Spencerdac69c82004-06-07 17:53:43 +0000166 // Information about the module, extracted from the bytecode revision number.
167 unsigned char RevisionNum; // The rev # itself
168
169 // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
170
171 // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
172 // block. This was fixed to be like all other blocks in 1.2
173 bool hasInconsistentModuleGlobalInfo;
174
175 // Revision #0 also explicitly encoded zero values for primitive types like
176 // int/sbyte/etc.
177 bool hasExplicitPrimitiveZeros;
178
179 // Flags to control features specific the LLVM 1.2 and before (revision #1)
180
181 // LLVM 1.2 and earlier required that getelementptr structure indices were
182 // ubyte constants and that sequential type indices were longs.
183 bool hasRestrictedGEPTypes;
184
185
186 /// CompactionTable - If a compaction table is active in the current function,
187 /// this is the mapping that it contains.
188 std::vector<Type*> CompactionTypeTable;
189
190 // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
191 // forward references to constants. Such values may be referenced before they
192 // are defined, and if so, the temporary object that they represent is held
193 // here.
194 //
195 typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
196 ConstantRefsType ConstantFwdRefs;
197
198 // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
199 // to deal with forward references to types.
200 //
Reid Spencerdac69c82004-06-07 17:53:43 +0000201 TypeListTy ModuleTypes;
202 TypeListTy FunctionTypes;
203
204 // When the ModuleGlobalInfo section is read, we create a FunctionType object
205 // for each function in the module. When the function is loaded, this type is
206 // used to instantiate the actual function object.
Reid Spencer29dba0c2004-06-08 05:51:18 +0000207
Reid Spencerdac69c82004-06-07 17:53:43 +0000208 std::vector<const Type*> FunctionSignatureList;
209
210 // Constant values are read in after global variables. Because of this, we
211 // must defer setting the initializers on global variables until after module
212 // level constants have been read. In the mean time, this list keeps track of
213 // what we must do.
214 //
215 std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
216
Reid Spencer29dba0c2004-06-08 05:51:18 +0000217/// @}
218/// @name Implementation Details
219/// @{
220private:
221 /// This stores the parser's handler. It makes virtual function calls through
222 /// the BytecodeHandler to notify the handler of parsing events. What the
223 /// handler does with the events is completely orthogonal to the business of
224 /// parsing the bytecode.
225 /// @brief The handler of bytecode parsing events.
Reid Spencerdac69c82004-06-07 17:53:43 +0000226 BytecodeHandler* handler;
227
Reid Spencer29dba0c2004-06-08 05:51:18 +0000228 /// For lazy reading-in of functions, we need to save away several pieces of
229 /// information about each function: its begin and end pointer in the buffer
230 /// and its FunctionSlot.
231 struct LazyFunctionInfo {
232 const unsigned char *Buf, *EndBuf;
233 LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
234 : Buf(B), EndBuf(EB) {}
235 };
236 typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap;
237 LazyFunctionMap LazyFunctionLoadMap;
238
Reid Spencerdac69c82004-06-07 17:53:43 +0000239private:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000240
Reid Spencer00c28a72004-06-10 08:09:13 +0000241 /// Is there more to parse in the current block?
242 inline bool moreInBlock();
243
244 /// Have we read past the end of the block
245 inline void checkPastBlockEnd(const char * block_name);
246
247 /// Align to 32 bits
248 inline void align32();
249
250 /// Reader interface
251 inline unsigned read_uint();
252 inline unsigned read_vbr_uint();
253 inline uint64_t read_vbr_uint64();
254 inline int64_t read_vbr_int64();
255 inline std::string read_str();
256 inline void read_data(void *Ptr, void *End);
257
258 /// Read a block header
259 inline void readBlock(unsigned &Type, unsigned &Size);
Reid Spencer29dba0c2004-06-08 05:51:18 +0000260
Reid Spencerdac69c82004-06-07 17:53:43 +0000261 const Type *AbstractBytecodeParser::getType(unsigned ID);
262 /// getGlobalTableType - This is just like getType, but when a compaction
263 /// table is in use, it is ignored. Also, no forward references or other
264 /// fancy features are supported.
265 const Type *getGlobalTableType(unsigned Slot) {
266 if (Slot < Type::FirstDerivedTyID) {
267 const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot);
268 assert(Ty && "Not a primitive type ID?");
269 return Ty;
270 }
271 Slot -= Type::FirstDerivedTyID;
272 if (Slot >= ModuleTypes.size())
273 throw std::string("Illegal compaction table type reference!");
274 return ModuleTypes[Slot];
275 }
276
277 unsigned getGlobalTableTypeSlot(const Type *Ty) {
278 if (Ty->isPrimitiveType())
279 return Ty->getPrimitiveID();
280 TypeListTy::iterator I = find(ModuleTypes.begin(),
281 ModuleTypes.end(), Ty);
282 if (I == ModuleTypes.end())
283 throw std::string("Didn't find type in ModuleTypes.");
284 return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
285 }
286
Reid Spencer29dba0c2004-06-08 05:51:18 +0000287 AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
288 void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
289
290/// @}
291};
292
293/// This class provides the interface for the handling bytecode events during
294/// parsing. The methods on this interface are invoked by the
295/// AbstractBytecodeParser as it discovers the content of a bytecode stream.
296/// This class provides a a clear separation of concerns between recognizing
297/// the semantic units of a bytecode file and deciding what to do with them.
298/// The AbstractBytecodeParser recognizes the content of the bytecode file and
299/// calls the BytecodeHandler methods to determine what should be done. This
300/// arrangement allows Bytecode files to be read and handled for a number of
301/// purposes simply by creating a subclass of BytecodeHandler. None of the
302/// parsing details need to be understood, only the meaning of the calls
303/// made on this interface.
304///
305/// Another paradigm that uses this design pattern is the XML SAX Parser. The
306/// ContentHandler for SAX plays the same role as the BytecodeHandler here.
307/// @see AbstractbytecodeParser
308/// @brief Handle Bytecode Parsing Events
309class BytecodeHandler {
310
311/// @name Constructors And Operators
312/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +0000313public:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000314 /// @brief Default constructor (empty)
315 BytecodeHandler() {}
316 /// @brief Virtual destructor (empty)
317 virtual ~BytecodeHandler() {}
Reid Spencerdac69c82004-06-07 17:53:43 +0000318
319private:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000320 BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT
321 void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT
Reid Spencerdac69c82004-06-07 17:53:43 +0000322
Reid Spencer29dba0c2004-06-08 05:51:18 +0000323/// @}
324/// @name Handler Methods
325/// @{
326public:
Reid Spencerdac69c82004-06-07 17:53:43 +0000327
Reid Spencer29dba0c2004-06-08 05:51:18 +0000328 /// This method is called whenever the parser detects an error in the
329 /// bytecode formatting. Returning true will cause the parser to keep
330 /// going, however this is inadvisable in most cases. Returning false will
331 /// cause the parser to throw the message as a std::string.
332 /// @brief Handle parsing errors.
333 virtual bool handleError(const std::string& str );
334
335 /// This method is called at the beginning of a parse before anything is
336 /// read in order to give the handler a chance to initialize.
337 /// @brief Handle the start of a bytecode parse
338 virtual void handleStart();
339
340 /// This method is called at the end of a parse after everything has been
341 /// read in order to give the handler a chance to terminate.
342 /// @brief Handle the end of a bytecode parse
343 virtual void handleFinish();
344
345 /// This method is called at the start of a module to indicate that a
346 /// module is being parsed.
347 /// @brief Handle the start of a module.
348 virtual void handleModuleBegin(const std::string& id);
349
350 /// This method is called at the end of a module to indicate that the module
351 /// previously being parsed has concluded.
352 /// @brief Handle the end of a module.
353 virtual void handleModuleEnd(const std::string& id);
354
355 /// This method is called once the version information has been parsed. It
356 /// provides the information about the version of the bytecode file being
357 /// read.
358 /// @brief Handle the bytecode prolog
359 virtual void handleVersionInfo(
360 unsigned char RevisionNum, ///< Byte code revision number
361 Module::Endianness Endianness, ///< Endianness indicator
362 Module::PointerSize PointerSize ///< PointerSize indicator
363 );
364
365 /// This method is called at the start of a module globals block which
366 /// contains the global variables and the function placeholders
367 virtual void handleModuleGlobalsBegin();
368
369 /// This method is called when a non-initialized global variable is
370 /// recognized. Its type, constness, and linkage type are provided.
371 /// @brief Handle a non-initialized global variable
372 virtual void handleGlobalVariable(
373 const Type* ElemType, ///< The type of the global variable
374 bool isConstant, ///< Whether the GV is constant or not
375 GlobalValue::LinkageTypes ///< The linkage type of the GV
376 );
377
378 /// This method is called when an initialized global variable is recognized.
379 /// Its type constness, linkage type, and the slot number of the initializer
380 /// are provided.
381 /// @brief Handle an intialized global variable.
382 virtual void handleInitializedGV(
383 const Type* ElemType, ///< The type of the global variable
384 bool isConstant, ///< Whether the GV is constant or not
385 GlobalValue::LinkageTypes,///< The linkage type of the GV
386 unsigned initSlot ///< Slot number of GV's initializer
387 );
388
389 /// This method is called when a new type is recognized. The type is
390 /// converted from the bytecode and passed to this method.
391 /// @brief Handle a type
392 virtual void handleType( const Type* Ty );
393
394 /// This method is called when the function prototype for a function is
395 /// encountered in the module globals block.
396 virtual void handleFunctionDeclaration(
397 const Type* FuncType ///< The type of the function
398 );
399
400 /// This method is called at the end of the module globals block.
401 /// @brief Handle end of module globals block.
402 virtual void handleModuleGlobalsEnd();
403
404 /// This method is called at the beginning of a compaction table.
405 /// @brief Handle start of compaction table.
406 virtual void handleCompactionTableBegin();
407
408 /// @brief Handle start of a compaction table plane
409 virtual void handleCompactionTablePlane(
410 unsigned Ty,
411 unsigned NumEntries
412 );
413
414
415 /// @brief Handle a type entry in the compaction table
416 virtual void handleCompactionTableType(
417 unsigned i,
418 unsigned TypSlot,
419 const Type*
420 );
421
422 /// @brief Handle a value entry in the compaction table
423 virtual void handleCompactionTableValue(
424 unsigned i,
425 unsigned ValSlot,
426 const Type*
427 );
428
429 /// @brief Handle end of a compaction table
430 virtual void handleCompactionTableEnd();
431
432 /// @brief Handle start of a symbol table
433 virtual void handleSymbolTableBegin();
434
435 /// @brief Handle start of a symbol table plane
436 virtual void handleSymbolTablePlane(
437 unsigned Ty,
438 unsigned NumEntries,
439 const Type* Ty
440 );
441
442 /// @brief Handle a named type in the symbol table
443 virtual void handleSymbolTableType(
444 unsigned i,
445 unsigned slot,
446 const std::string& name
447 );
448
449 /// @brief Handle a named value in the symbol table
450 virtual void handleSymbolTableValue(
451 unsigned i,
452 unsigned slot,
453 const std::string& name
454 );
455
456 /// @brief Handle the end of a symbol table
457 virtual void handleSymbolTableEnd();
458
459 /// @brief Handle the beginning of a function body
460 virtual void handleFunctionBegin(
461 const Type* FType,
462 GlobalValue::LinkageTypes linkage
463 );
464
465 /// @brief Handle the end of a function body
466 virtual void handleFunctionEnd(
467 const Type* FType
468 );
469
470 /// @brief Handle the beginning of a basic block
471 virtual void handleBasicBlockBegin(
472 unsigned blocknum
473 );
474
475 /// This method is called for each instruction that is parsed.
476 /// @returns true if the instruction is a block terminating instruction
477 /// @brief Handle an instruction
478 virtual bool handleInstruction(
479 unsigned Opcode,
480 const Type* iType,
Reid Spencer00c28a72004-06-10 08:09:13 +0000481 std::vector<unsigned>& Operands,
482 unsigned Length
Reid Spencer29dba0c2004-06-08 05:51:18 +0000483 );
484
485 /// @brief Handle the end of a basic block
486 virtual void handleBasicBlockEnd(unsigned blocknum);
487
488 /// @brief Handle start of global constants block.
489 virtual void handleGlobalConstantsBegin();
490
491 /// @brief Handle a constant expression
492 virtual void handleConstantExpression(
493 unsigned Opcode,
494 const Type* Typ,
495 std::vector<std::pair<const Type*,unsigned> > ArgVec
496 );
497
498 /// @brief Handle a constant array
499 virtual void handleConstantArray(
500 const ArrayType* AT,
501 std::vector<unsigned>& ElementSlots
502 );
503
504 /// @brief Handle a constant structure
505 virtual void handleConstantStruct(
506 const StructType* ST,
507 std::vector<unsigned>& ElementSlots
508 );
509
510 /// @brief Handle a constant pointer
511 virtual void handleConstantPointer(
512 const PointerType* PT,
513 unsigned Slot
514 );
515
516 /// @brief Handle a constant strings (array special case)
517 virtual void handleConstantString(
518 const ConstantArray* CA
519 );
520
521 /// @brief Handle a primitive constant value
522 virtual void handleConstantValue( Constant * c );
523
524 /// @brief Handle the end of the global constants
525 virtual void handleGlobalConstantsEnd();
526
Reid Spencer00c28a72004-06-10 08:09:13 +0000527 /// @brief Handle an alignment event
528 virtual void handleAlignment(unsigned numBytes);
529
530 virtual void handleBlock(
531 unsigned BType, ///< The type of block
532 const unsigned char* StartPtr, ///< The start of the block
533 unsigned Size ///< The size of the block
534 );
535 virtual void handleVBR32(unsigned Size );
536 virtual void handleVBR64(unsigned Size );
Reid Spencer29dba0c2004-06-08 05:51:18 +0000537/// @}
Reid Spencerdac69c82004-06-07 17:53:43 +0000538
539};
540
Reid Spencerdac69c82004-06-07 17:53:43 +0000541} // End llvm namespace
542
Reid Spencerdac69c82004-06-07 17:53:43 +0000543// vim: sw=2
Reid Spencer29dba0c2004-06-08 05:51:18 +0000544#endif