| //===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file was developed by Reid Spencer and is distributed under the |
| // University of Illinois Open Source License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This header file defines the interface to the Bytecode Parser and the |
| // Bytecode Handler interface that it calls. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef BYTECODE_PARSER_H |
| #define BYTECODE_PARSER_H |
| |
| #include "llvm/Constants.h" |
| #include "llvm/DerivedTypes.h" |
| #include "llvm/GlobalValue.h" |
| #include "llvm/Module.h" |
| #include <utility> |
| #include <vector> |
| #include <map> |
| |
| namespace llvm { |
| |
| class BytecodeHandler; ///< Forward declare the handler interface |
| |
| /// This class defines the interface for parsing a buffer of bytecode. The |
| /// parser itself takes no action except to call the various functions of |
| /// the handler interface. The parser's sole responsibility is the correct |
| /// interpretation of the bytecode buffer. The handler is responsible for |
| /// instantiating and keeping track of all values. As a convenience, the parser |
| /// is responsible for materializing types and will pass them through the |
| /// handler interface as necessary. |
| /// @see BytecodeHandler |
| /// @brief Abstract Bytecode Parser interface |
| class AbstractBytecodeParser { |
| |
| /// @name Constructors |
| /// @{ |
| public: |
| AbstractBytecodeParser( |
| BytecodeHandler* h, |
| bool repAlignment = false, |
| bool repBlocks = false, |
| bool repVBR = false |
| ) { |
| handler = h; |
| reportAlignment = repAlignment; |
| reportBlocks = repBlocks; |
| reportVBR = repVBR; |
| } |
| |
| ~AbstractBytecodeParser() { } |
| |
| /// @} |
| /// @name Types |
| /// @{ |
| public: |
| /// @brief A convenience type for the buffer pointer |
| typedef const unsigned char* BufPtr; |
| |
| /// @brief The type used for vector of potentially abstract types |
| typedef std::vector<PATypeHolder> TypeListTy; |
| |
| /// @brief |
| |
| /// @} |
| /// @name Methods |
| /// @{ |
| public: |
| |
| /// @brief Main interface to parsing a bytecode buffer. |
| void ParseBytecode(const unsigned char *Buf, unsigned Length, |
| const std::string &ModuleID); |
| |
| /// The ParseBytecode method lazily parses functions. Use this |
| /// method to cause the parser to actually parse all the function bodies |
| /// in the bytecode buffer. |
| /// @see ParseBytecode |
| /// @brief Parse all function bodies |
| void ParseAllFunctionBodies (); |
| |
| /// The Parsebytecode method lazily parses functions. Use this |
| /// method to casue the parser to parse the next function of a given |
| /// types. Note that this will remove the function from what is to be |
| /// included by ParseAllFunctionBodies. |
| /// @see ParseAllFunctionBodies |
| /// @see ParseBytecode |
| /// @brief Parse the next function of specific type |
| void ParseNextFunction (Type* FType) ; |
| |
| /// @} |
| /// @name Parsing Units For Subclasses |
| /// @{ |
| protected: |
| /// @brief Parse whole module scope |
| void ParseModule (); |
| |
| /// @brief Parse the version information block |
| void ParseVersionInfo (); |
| |
| /// @brief Parse the ModuleGlobalInfo block |
| void ParseModuleGlobalInfo (); |
| |
| /// @brief Parse a symbol table |
| void ParseSymbolTable (); |
| |
| /// This function parses LLVM functions lazily. It obtains the type of the |
| /// function and records where the body of the function is in the bytecode |
| /// buffer. The caller can then use the ParseNextFunction and |
| /// ParseAllFunctionBodies to get handler events for the functions. |
| /// @brief Parse functions lazily. |
| void ParseFunctionLazily (); |
| |
| /// @brief Parse a function body |
| void ParseFunctionBody (const Type* FType); |
| |
| /// @brief Parse a compaction table |
| void ParseCompactionTable (); |
| |
| /// @brief Parse global types |
| void ParseGlobalTypes (); |
| |
| /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) |
| void ParseBasicBlock (unsigned BlockNo); |
| |
| /// @brief parse an instruction list (for post LLVM 1.0 instruction lists |
| /// with blocks differentiated by terminating instructions. |
| unsigned ParseInstructionList(); |
| |
| /// @brief Parse an instruction. |
| bool ParseInstruction (std::vector<unsigned>& Args); |
| |
| /// @brief Parse a constant pool |
| void ParseConstantPool (TypeListTy& List); |
| |
| /// @brief Parse a constant value |
| void ParseConstantValue (unsigned TypeID); |
| |
| /// @brief Parse a block of types. |
| void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries); |
| |
| /// @brief Parse a single type. |
| const Type *ParseTypeConstant(); |
| |
| /// @brief Parse a string constants block |
| void ParseStringConstants (unsigned NumEntries); |
| |
| /// @} |
| /// @name Data |
| /// @{ |
| private: |
| BufPtr MemStart; ///< Start of the memory buffer |
| BufPtr MemEnd; ///< End of the memory buffer |
| BufPtr BlockStart; ///< Start of current block being parsed |
| BufPtr BlockEnd; ///< End of current block being parsed |
| BufPtr At; ///< Where we're currently parsing at |
| |
| bool reportAlignment; ///< Parser should report alignment? |
| bool reportBlocks; ///< Parser should report blocks? |
| bool reportVBR; ///< Report VBR compression events |
| |
| // Information about the module, extracted from the bytecode revision number. |
| unsigned char RevisionNum; // The rev # itself |
| |
| // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0) |
| |
| // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo |
| // block. This was fixed to be like all other blocks in 1.2 |
| bool hasInconsistentModuleGlobalInfo; |
| |
| // Revision #0 also explicitly encoded zero values for primitive types like |
| // int/sbyte/etc. |
| bool hasExplicitPrimitiveZeros; |
| |
| // Flags to control features specific the LLVM 1.2 and before (revision #1) |
| |
| // LLVM 1.2 and earlier required that getelementptr structure indices were |
| // ubyte constants and that sequential type indices were longs. |
| bool hasRestrictedGEPTypes; |
| |
| |
| /// CompactionTable - If a compaction table is active in the current function, |
| /// this is the mapping that it contains. |
| std::vector<Type*> CompactionTypeTable; |
| |
| // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and |
| // forward references to constants. Such values may be referenced before they |
| // are defined, and if so, the temporary object that they represent is held |
| // here. |
| // |
| typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType; |
| ConstantRefsType ConstantFwdRefs; |
| |
| // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used |
| // to deal with forward references to types. |
| // |
| TypeListTy ModuleTypes; |
| TypeListTy FunctionTypes; |
| |
| // When the ModuleGlobalInfo section is read, we create a FunctionType object |
| // for each function in the module. When the function is loaded, this type is |
| // used to instantiate the actual function object. |
| |
| std::vector<const Type*> FunctionSignatureList; |
| |
| // Constant values are read in after global variables. Because of this, we |
| // must defer setting the initializers on global variables until after module |
| // level constants have been read. In the mean time, this list keeps track of |
| // what we must do. |
| // |
| std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; |
| |
| /// @} |
| /// @name Implementation Details |
| /// @{ |
| private: |
| /// This stores the parser's handler. It makes virtual function calls through |
| /// the BytecodeHandler to notify the handler of parsing events. What the |
| /// handler does with the events is completely orthogonal to the business of |
| /// parsing the bytecode. |
| /// @brief The handler of bytecode parsing events. |
| BytecodeHandler* handler; |
| |
| /// For lazy reading-in of functions, we need to save away several pieces of |
| /// information about each function: its begin and end pointer in the buffer |
| /// and its FunctionSlot. |
| struct LazyFunctionInfo { |
| const unsigned char *Buf, *EndBuf; |
| LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) |
| : Buf(B), EndBuf(EB) {} |
| }; |
| typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap; |
| LazyFunctionMap LazyFunctionLoadMap; |
| |
| private: |
| |
| /// Is there more to parse in the current block? |
| inline bool moreInBlock(); |
| |
| /// Have we read past the end of the block |
| inline void checkPastBlockEnd(const char * block_name); |
| |
| /// Align to 32 bits |
| inline void align32(); |
| |
| /// Reader interface |
| inline unsigned read_uint(); |
| inline unsigned read_vbr_uint(); |
| inline uint64_t read_vbr_uint64(); |
| inline int64_t read_vbr_int64(); |
| inline std::string read_str(); |
| inline void read_data(void *Ptr, void *End); |
| |
| /// Read a block header |
| inline void readBlock(unsigned &Type, unsigned &Size); |
| |
| const Type *AbstractBytecodeParser::getType(unsigned ID); |
| /// getGlobalTableType - This is just like getType, but when a compaction |
| /// table is in use, it is ignored. Also, no forward references or other |
| /// fancy features are supported. |
| const Type *getGlobalTableType(unsigned Slot) { |
| if (Slot < Type::FirstDerivedTyID) { |
| const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot); |
| assert(Ty && "Not a primitive type ID?"); |
| return Ty; |
| } |
| Slot -= Type::FirstDerivedTyID; |
| if (Slot >= ModuleTypes.size()) |
| throw std::string("Illegal compaction table type reference!"); |
| return ModuleTypes[Slot]; |
| } |
| |
| unsigned getGlobalTableTypeSlot(const Type *Ty) { |
| if (Ty->isPrimitiveType()) |
| return Ty->getPrimitiveID(); |
| TypeListTy::iterator I = find(ModuleTypes.begin(), |
| ModuleTypes.end(), Ty); |
| if (I == ModuleTypes.end()) |
| throw std::string("Didn't find type in ModuleTypes."); |
| return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); |
| } |
| |
| AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT |
| void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT |
| |
| /// @} |
| }; |
| |
| /// This class provides the interface for the handling bytecode events during |
| /// parsing. The methods on this interface are invoked by the |
| /// AbstractBytecodeParser as it discovers the content of a bytecode stream. |
| /// This class provides a a clear separation of concerns between recognizing |
| /// the semantic units of a bytecode file and deciding what to do with them. |
| /// The AbstractBytecodeParser recognizes the content of the bytecode file and |
| /// calls the BytecodeHandler methods to determine what should be done. This |
| /// arrangement allows Bytecode files to be read and handled for a number of |
| /// purposes simply by creating a subclass of BytecodeHandler. None of the |
| /// parsing details need to be understood, only the meaning of the calls |
| /// made on this interface. |
| /// |
| /// Another paradigm that uses this design pattern is the XML SAX Parser. The |
| /// ContentHandler for SAX plays the same role as the BytecodeHandler here. |
| /// @see AbstractbytecodeParser |
| /// @brief Handle Bytecode Parsing Events |
| class BytecodeHandler { |
| |
| /// @name Constructors And Operators |
| /// @{ |
| public: |
| /// @brief Default constructor (empty) |
| BytecodeHandler() {} |
| /// @brief Virtual destructor (empty) |
| virtual ~BytecodeHandler() {} |
| |
| private: |
| BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT |
| void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT |
| |
| /// @} |
| /// @name Handler Methods |
| /// @{ |
| public: |
| |
| /// This method is called whenever the parser detects an error in the |
| /// bytecode formatting. Returning true will cause the parser to keep |
| /// going, however this is inadvisable in most cases. Returning false will |
| /// cause the parser to throw the message as a std::string. |
| /// @brief Handle parsing errors. |
| virtual bool handleError(const std::string& str ); |
| |
| /// This method is called at the beginning of a parse before anything is |
| /// read in order to give the handler a chance to initialize. |
| /// @brief Handle the start of a bytecode parse |
| virtual void handleStart(); |
| |
| /// This method is called at the end of a parse after everything has been |
| /// read in order to give the handler a chance to terminate. |
| /// @brief Handle the end of a bytecode parse |
| virtual void handleFinish(); |
| |
| /// This method is called at the start of a module to indicate that a |
| /// module is being parsed. |
| /// @brief Handle the start of a module. |
| virtual void handleModuleBegin(const std::string& id); |
| |
| /// This method is called at the end of a module to indicate that the module |
| /// previously being parsed has concluded. |
| /// @brief Handle the end of a module. |
| virtual void handleModuleEnd(const std::string& id); |
| |
| /// This method is called once the version information has been parsed. It |
| /// provides the information about the version of the bytecode file being |
| /// read. |
| /// @brief Handle the bytecode prolog |
| virtual void handleVersionInfo( |
| unsigned char RevisionNum, ///< Byte code revision number |
| Module::Endianness Endianness, ///< Endianness indicator |
| Module::PointerSize PointerSize ///< PointerSize indicator |
| ); |
| |
| /// This method is called at the start of a module globals block which |
| /// contains the global variables and the function placeholders |
| virtual void handleModuleGlobalsBegin(); |
| |
| /// This method is called when a non-initialized global variable is |
| /// recognized. Its type, constness, and linkage type are provided. |
| /// @brief Handle a non-initialized global variable |
| virtual void handleGlobalVariable( |
| const Type* ElemType, ///< The type of the global variable |
| bool isConstant, ///< Whether the GV is constant or not |
| GlobalValue::LinkageTypes ///< The linkage type of the GV |
| ); |
| |
| /// This method is called when an initialized global variable is recognized. |
| /// Its type constness, linkage type, and the slot number of the initializer |
| /// are provided. |
| /// @brief Handle an intialized global variable. |
| virtual void handleInitializedGV( |
| const Type* ElemType, ///< The type of the global variable |
| bool isConstant, ///< Whether the GV is constant or not |
| GlobalValue::LinkageTypes,///< The linkage type of the GV |
| unsigned initSlot ///< Slot number of GV's initializer |
| ); |
| |
| /// This method is called when a new type is recognized. The type is |
| /// converted from the bytecode and passed to this method. |
| /// @brief Handle a type |
| virtual void handleType( const Type* Ty ); |
| |
| /// This method is called when the function prototype for a function is |
| /// encountered in the module globals block. |
| virtual void handleFunctionDeclaration( |
| const Type* FuncType ///< The type of the function |
| ); |
| |
| /// This method is called at the end of the module globals block. |
| /// @brief Handle end of module globals block. |
| virtual void handleModuleGlobalsEnd(); |
| |
| /// This method is called at the beginning of a compaction table. |
| /// @brief Handle start of compaction table. |
| virtual void handleCompactionTableBegin(); |
| |
| /// @brief Handle start of a compaction table plane |
| virtual void handleCompactionTablePlane( |
| unsigned Ty, |
| unsigned NumEntries |
| ); |
| |
| |
| /// @brief Handle a type entry in the compaction table |
| virtual void handleCompactionTableType( |
| unsigned i, |
| unsigned TypSlot, |
| const Type* |
| ); |
| |
| /// @brief Handle a value entry in the compaction table |
| virtual void handleCompactionTableValue( |
| unsigned i, |
| unsigned ValSlot, |
| const Type* |
| ); |
| |
| /// @brief Handle end of a compaction table |
| virtual void handleCompactionTableEnd(); |
| |
| /// @brief Handle start of a symbol table |
| virtual void handleSymbolTableBegin(); |
| |
| /// @brief Handle start of a symbol table plane |
| virtual void handleSymbolTablePlane( |
| unsigned Ty, |
| unsigned NumEntries, |
| const Type* Ty |
| ); |
| |
| /// @brief Handle a named type in the symbol table |
| virtual void handleSymbolTableType( |
| unsigned i, |
| unsigned slot, |
| const std::string& name |
| ); |
| |
| /// @brief Handle a named value in the symbol table |
| virtual void handleSymbolTableValue( |
| unsigned i, |
| unsigned slot, |
| const std::string& name |
| ); |
| |
| /// @brief Handle the end of a symbol table |
| virtual void handleSymbolTableEnd(); |
| |
| /// @brief Handle the beginning of a function body |
| virtual void handleFunctionBegin( |
| const Type* FType, |
| GlobalValue::LinkageTypes linkage |
| ); |
| |
| /// @brief Handle the end of a function body |
| virtual void handleFunctionEnd( |
| const Type* FType |
| ); |
| |
| /// @brief Handle the beginning of a basic block |
| virtual void handleBasicBlockBegin( |
| unsigned blocknum |
| ); |
| |
| /// This method is called for each instruction that is parsed. |
| /// @returns true if the instruction is a block terminating instruction |
| /// @brief Handle an instruction |
| virtual bool handleInstruction( |
| unsigned Opcode, |
| const Type* iType, |
| std::vector<unsigned>& Operands, |
| unsigned Length |
| ); |
| |
| /// @brief Handle the end of a basic block |
| virtual void handleBasicBlockEnd(unsigned blocknum); |
| |
| /// @brief Handle start of global constants block. |
| virtual void handleGlobalConstantsBegin(); |
| |
| /// @brief Handle a constant expression |
| virtual void handleConstantExpression( |
| unsigned Opcode, |
| const Type* Typ, |
| std::vector<std::pair<const Type*,unsigned> > ArgVec |
| ); |
| |
| /// @brief Handle a constant array |
| virtual void handleConstantArray( |
| const ArrayType* AT, |
| std::vector<unsigned>& ElementSlots |
| ); |
| |
| /// @brief Handle a constant structure |
| virtual void handleConstantStruct( |
| const StructType* ST, |
| std::vector<unsigned>& ElementSlots |
| ); |
| |
| /// @brief Handle a constant pointer |
| virtual void handleConstantPointer( |
| const PointerType* PT, |
| unsigned Slot |
| ); |
| |
| /// @brief Handle a constant strings (array special case) |
| virtual void handleConstantString( |
| const ConstantArray* CA |
| ); |
| |
| /// @brief Handle a primitive constant value |
| virtual void handleConstantValue( Constant * c ); |
| |
| /// @brief Handle the end of the global constants |
| virtual void handleGlobalConstantsEnd(); |
| |
| /// @brief Handle an alignment event |
| virtual void handleAlignment(unsigned numBytes); |
| |
| virtual void handleBlock( |
| unsigned BType, ///< The type of block |
| const unsigned char* StartPtr, ///< The start of the block |
| unsigned Size ///< The size of the block |
| ); |
| virtual void handleVBR32(unsigned Size ); |
| virtual void handleVBR64(unsigned Size ); |
| /// @} |
| |
| }; |
| |
| } // End llvm namespace |
| |
| // vim: sw=2 |
| #endif |