blob: 38a14717d5e87b3a51969595ed6bf4f9be0f9b3d [file] [log] [blame]
Reid Spencer29dba0c2004-06-08 05:51:18 +00001//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===//
Reid Spencerdac69c82004-06-07 17:53:43 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
Reid Spencer29dba0c2004-06-08 05:51:18 +000010// This header file defines the interface to the Bytecode Parser and the
11// Bytecode Handler interface that it calls.
Reid Spencerdac69c82004-06-07 17:53:43 +000012//
13//===----------------------------------------------------------------------===//
14
15#ifndef BYTECODE_PARSER_H
16#define BYTECODE_PARSER_H
17
Reid Spencerdac69c82004-06-07 17:53:43 +000018#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
Reid Spencer29dba0c2004-06-08 05:51:18 +000020#include "llvm/GlobalValue.h"
21#include "llvm/Module.h"
Reid Spencerdac69c82004-06-07 17:53:43 +000022#include <utility>
23#include <vector>
24#include <map>
25
26namespace llvm {
27
Reid Spencer29dba0c2004-06-08 05:51:18 +000028class BytecodeHandler; ///< Forward declare the handler interface
Reid Spencerdac69c82004-06-07 17:53:43 +000029
Reid Spencer29dba0c2004-06-08 05:51:18 +000030/// This class defines the interface for parsing a buffer of bytecode. The
31/// parser itself takes no action except to call the various functions of
32/// the handler interface. The parser's sole responsibility is the correct
33/// interpretation of the bytecode buffer. The handler is responsible for
34/// instantiating and keeping track of all values. As a convenience, the parser
35/// is responsible for materializing types and will pass them through the
36/// handler interface as necessary.
37/// @see BytecodeHandler
38/// @brief Abstract Bytecode Parser interface
Reid Spencerdac69c82004-06-07 17:53:43 +000039class AbstractBytecodeParser {
Reid Spencer29dba0c2004-06-08 05:51:18 +000040
41/// @name Constructors
42/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +000043public:
44 AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; }
45 ~AbstractBytecodeParser() { }
46
Reid Spencer29dba0c2004-06-08 05:51:18 +000047/// @}
48/// @name Types
49/// @{
50public:
51 /// @brief A convenience type for the buffer pointer
52 typedef const unsigned char* BufPtr;
53
54 /// @brief The type used for vector of potentially abstract types
55 typedef std::vector<PATypeHolder> TypeListTy;
56
57 /// @brief
58
59/// @}
60/// @name Methods
61/// @{
62public:
63
64 /// @brief Main interface to parsing a bytecode buffer.
Reid Spencerdac69c82004-06-07 17:53:43 +000065 void ParseBytecode(const unsigned char *Buf, unsigned Length,
66 const std::string &ModuleID);
67
Reid Spencer29dba0c2004-06-08 05:51:18 +000068 /// The ParseBytecode method lazily parses functions. Use this
69 /// method to cause the parser to actually parse all the function bodies
70 /// in the bytecode buffer.
71 /// @see ParseBytecode
72 /// @brief Parse all function bodies
73 void ParseAllFunctionBodies ();
Reid Spencerdac69c82004-06-07 17:53:43 +000074
Reid Spencer29dba0c2004-06-08 05:51:18 +000075 /// The Parsebytecode method lazily parses functions. Use this
76 /// method to casue the parser to parse the next function of a given
77 /// types. Note that this will remove the function from what is to be
78 /// included by ParseAllFunctionBodies.
79 /// @see ParseAllFunctionBodies
80 /// @see ParseBytecode
81 /// @brief Parse the next function of specific type
82 void ParseNextFunction (Type* FType) ;
83
84/// @}
85/// @name Parsing Units For Subclasses
86/// @{
87protected:
88 /// @brief Parse whole module scope
89 void ParseModule (BufPtr &Buf, BufPtr End);
90
91 /// @brief Parse the version information block
92 void ParseVersionInfo (BufPtr &Buf, BufPtr End);
93
94 /// @brief Parse the ModuleGlobalInfo block
95 void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End);
96
97 /// @brief Parse a symbol table
98 void ParseSymbolTable (BufPtr &Buf, BufPtr End);
99
100 /// This function parses LLVM functions lazily. It obtains the type of the
101 /// function and records where the body of the function is in the bytecode
102 /// buffer. The caller can then use the ParseNextFunction and
103 /// ParseAllFunctionBodies to get handler events for the functions.
104 /// @brief Parse functions lazily.
105 void ParseFunctionLazily (BufPtr &Buf, BufPtr End);
106
107 /// @brief Parse a function body
108 void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf);
109
110 /// @brief Parse a compaction table
111 void ParseCompactionTable (BufPtr &Buf, BufPtr End);
112
113 /// @brief Parse global types
114 void ParseGlobalTypes (BufPtr &Buf, BufPtr End);
115
116 /// @brief Parse a basic block (for LLVM 1.0 basic block blocks)
117 void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo);
118
119 /// @brief parse an instruction list (for post LLVM 1.0 instruction lists
120 /// with blocks differentiated by terminating instructions.
121 unsigned ParseInstructionList(BufPtr &Buf, BufPtr End);
122
123 /// @brief Parse an instruction.
124 bool ParseInstruction (BufPtr &Buf, BufPtr End,
125 std::vector<unsigned>& Args);
126
127 /// @brief Parse a constant pool
128 void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List);
129
130 /// @brief Parse a constant value
131 void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID);
132
133 /// @brief Parse a block of types.
134 void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab,
135 unsigned NumEntries);
136
137 /// @brief Parse a single type.
138 const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End);
139
140 /// @brief Parse a string constants block
141 void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries);
142
143/// @}
144/// @name Data
145/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +0000146private:
147 // Information about the module, extracted from the bytecode revision number.
148 unsigned char RevisionNum; // The rev # itself
149
150 // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
151
152 // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
153 // block. This was fixed to be like all other blocks in 1.2
154 bool hasInconsistentModuleGlobalInfo;
155
156 // Revision #0 also explicitly encoded zero values for primitive types like
157 // int/sbyte/etc.
158 bool hasExplicitPrimitiveZeros;
159
160 // Flags to control features specific the LLVM 1.2 and before (revision #1)
161
162 // LLVM 1.2 and earlier required that getelementptr structure indices were
163 // ubyte constants and that sequential type indices were longs.
164 bool hasRestrictedGEPTypes;
165
166
167 /// CompactionTable - If a compaction table is active in the current function,
168 /// this is the mapping that it contains.
169 std::vector<Type*> CompactionTypeTable;
170
171 // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
172 // forward references to constants. Such values may be referenced before they
173 // are defined, and if so, the temporary object that they represent is held
174 // here.
175 //
176 typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
177 ConstantRefsType ConstantFwdRefs;
178
179 // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
180 // to deal with forward references to types.
181 //
Reid Spencerdac69c82004-06-07 17:53:43 +0000182 TypeListTy ModuleTypes;
183 TypeListTy FunctionTypes;
184
185 // When the ModuleGlobalInfo section is read, we create a FunctionType object
186 // for each function in the module. When the function is loaded, this type is
187 // used to instantiate the actual function object.
Reid Spencer29dba0c2004-06-08 05:51:18 +0000188
Reid Spencerdac69c82004-06-07 17:53:43 +0000189 std::vector<const Type*> FunctionSignatureList;
190
191 // Constant values are read in after global variables. Because of this, we
192 // must defer setting the initializers on global variables until after module
193 // level constants have been read. In the mean time, this list keeps track of
194 // what we must do.
195 //
196 std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
197
Reid Spencer29dba0c2004-06-08 05:51:18 +0000198/// @}
199/// @name Implementation Details
200/// @{
201private:
202 /// This stores the parser's handler. It makes virtual function calls through
203 /// the BytecodeHandler to notify the handler of parsing events. What the
204 /// handler does with the events is completely orthogonal to the business of
205 /// parsing the bytecode.
206 /// @brief The handler of bytecode parsing events.
Reid Spencerdac69c82004-06-07 17:53:43 +0000207 BytecodeHandler* handler;
208
Reid Spencer29dba0c2004-06-08 05:51:18 +0000209 /// For lazy reading-in of functions, we need to save away several pieces of
210 /// information about each function: its begin and end pointer in the buffer
211 /// and its FunctionSlot.
212 struct LazyFunctionInfo {
213 const unsigned char *Buf, *EndBuf;
214 LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
215 : Buf(B), EndBuf(EB) {}
216 };
217 typedef std::map<const Type*, LazyFunctionInfo> LazyFunctionMap;
218 LazyFunctionMap LazyFunctionLoadMap;
219
Reid Spencerdac69c82004-06-07 17:53:43 +0000220private:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000221
222 static inline void readBlock(const unsigned char *&Buf,
223 const unsigned char *EndBuf,
224 unsigned &Type, unsigned &Size) ;
225
Reid Spencerdac69c82004-06-07 17:53:43 +0000226 const Type *AbstractBytecodeParser::getType(unsigned ID);
227 /// getGlobalTableType - This is just like getType, but when a compaction
228 /// table is in use, it is ignored. Also, no forward references or other
229 /// fancy features are supported.
230 const Type *getGlobalTableType(unsigned Slot) {
231 if (Slot < Type::FirstDerivedTyID) {
232 const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot);
233 assert(Ty && "Not a primitive type ID?");
234 return Ty;
235 }
236 Slot -= Type::FirstDerivedTyID;
237 if (Slot >= ModuleTypes.size())
238 throw std::string("Illegal compaction table type reference!");
239 return ModuleTypes[Slot];
240 }
241
242 unsigned getGlobalTableTypeSlot(const Type *Ty) {
243 if (Ty->isPrimitiveType())
244 return Ty->getPrimitiveID();
245 TypeListTy::iterator I = find(ModuleTypes.begin(),
246 ModuleTypes.end(), Ty);
247 if (I == ModuleTypes.end())
248 throw std::string("Didn't find type in ModuleTypes.");
249 return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
250 }
251
Reid Spencer29dba0c2004-06-08 05:51:18 +0000252 AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
253 void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
254
255/// @}
256};
257
258/// This class provides the interface for the handling bytecode events during
259/// parsing. The methods on this interface are invoked by the
260/// AbstractBytecodeParser as it discovers the content of a bytecode stream.
261/// This class provides a a clear separation of concerns between recognizing
262/// the semantic units of a bytecode file and deciding what to do with them.
263/// The AbstractBytecodeParser recognizes the content of the bytecode file and
264/// calls the BytecodeHandler methods to determine what should be done. This
265/// arrangement allows Bytecode files to be read and handled for a number of
266/// purposes simply by creating a subclass of BytecodeHandler. None of the
267/// parsing details need to be understood, only the meaning of the calls
268/// made on this interface.
269///
270/// Another paradigm that uses this design pattern is the XML SAX Parser. The
271/// ContentHandler for SAX plays the same role as the BytecodeHandler here.
272/// @see AbstractbytecodeParser
273/// @brief Handle Bytecode Parsing Events
274class BytecodeHandler {
275
276/// @name Constructors And Operators
277/// @{
Reid Spencerdac69c82004-06-07 17:53:43 +0000278public:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000279 /// @brief Default constructor (empty)
280 BytecodeHandler() {}
281 /// @brief Virtual destructor (empty)
282 virtual ~BytecodeHandler() {}
Reid Spencerdac69c82004-06-07 17:53:43 +0000283
284private:
Reid Spencer29dba0c2004-06-08 05:51:18 +0000285 BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT
286 void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT
Reid Spencerdac69c82004-06-07 17:53:43 +0000287
Reid Spencer29dba0c2004-06-08 05:51:18 +0000288/// @}
289/// @name Handler Methods
290/// @{
291public:
Reid Spencerdac69c82004-06-07 17:53:43 +0000292
Reid Spencer29dba0c2004-06-08 05:51:18 +0000293 /// This method is called whenever the parser detects an error in the
294 /// bytecode formatting. Returning true will cause the parser to keep
295 /// going, however this is inadvisable in most cases. Returning false will
296 /// cause the parser to throw the message as a std::string.
297 /// @brief Handle parsing errors.
298 virtual bool handleError(const std::string& str );
299
300 /// This method is called at the beginning of a parse before anything is
301 /// read in order to give the handler a chance to initialize.
302 /// @brief Handle the start of a bytecode parse
303 virtual void handleStart();
304
305 /// This method is called at the end of a parse after everything has been
306 /// read in order to give the handler a chance to terminate.
307 /// @brief Handle the end of a bytecode parse
308 virtual void handleFinish();
309
310 /// This method is called at the start of a module to indicate that a
311 /// module is being parsed.
312 /// @brief Handle the start of a module.
313 virtual void handleModuleBegin(const std::string& id);
314
315 /// This method is called at the end of a module to indicate that the module
316 /// previously being parsed has concluded.
317 /// @brief Handle the end of a module.
318 virtual void handleModuleEnd(const std::string& id);
319
320 /// This method is called once the version information has been parsed. It
321 /// provides the information about the version of the bytecode file being
322 /// read.
323 /// @brief Handle the bytecode prolog
324 virtual void handleVersionInfo(
325 unsigned char RevisionNum, ///< Byte code revision number
326 Module::Endianness Endianness, ///< Endianness indicator
327 Module::PointerSize PointerSize ///< PointerSize indicator
328 );
329
330 /// This method is called at the start of a module globals block which
331 /// contains the global variables and the function placeholders
332 virtual void handleModuleGlobalsBegin();
333
334 /// This method is called when a non-initialized global variable is
335 /// recognized. Its type, constness, and linkage type are provided.
336 /// @brief Handle a non-initialized global variable
337 virtual void handleGlobalVariable(
338 const Type* ElemType, ///< The type of the global variable
339 bool isConstant, ///< Whether the GV is constant or not
340 GlobalValue::LinkageTypes ///< The linkage type of the GV
341 );
342
343 /// This method is called when an initialized global variable is recognized.
344 /// Its type constness, linkage type, and the slot number of the initializer
345 /// are provided.
346 /// @brief Handle an intialized global variable.
347 virtual void handleInitializedGV(
348 const Type* ElemType, ///< The type of the global variable
349 bool isConstant, ///< Whether the GV is constant or not
350 GlobalValue::LinkageTypes,///< The linkage type of the GV
351 unsigned initSlot ///< Slot number of GV's initializer
352 );
353
354 /// This method is called when a new type is recognized. The type is
355 /// converted from the bytecode and passed to this method.
356 /// @brief Handle a type
357 virtual void handleType( const Type* Ty );
358
359 /// This method is called when the function prototype for a function is
360 /// encountered in the module globals block.
361 virtual void handleFunctionDeclaration(
362 const Type* FuncType ///< The type of the function
363 );
364
365 /// This method is called at the end of the module globals block.
366 /// @brief Handle end of module globals block.
367 virtual void handleModuleGlobalsEnd();
368
369 /// This method is called at the beginning of a compaction table.
370 /// @brief Handle start of compaction table.
371 virtual void handleCompactionTableBegin();
372
373 /// @brief Handle start of a compaction table plane
374 virtual void handleCompactionTablePlane(
375 unsigned Ty,
376 unsigned NumEntries
377 );
378
379
380 /// @brief Handle a type entry in the compaction table
381 virtual void handleCompactionTableType(
382 unsigned i,
383 unsigned TypSlot,
384 const Type*
385 );
386
387 /// @brief Handle a value entry in the compaction table
388 virtual void handleCompactionTableValue(
389 unsigned i,
390 unsigned ValSlot,
391 const Type*
392 );
393
394 /// @brief Handle end of a compaction table
395 virtual void handleCompactionTableEnd();
396
397 /// @brief Handle start of a symbol table
398 virtual void handleSymbolTableBegin();
399
400 /// @brief Handle start of a symbol table plane
401 virtual void handleSymbolTablePlane(
402 unsigned Ty,
403 unsigned NumEntries,
404 const Type* Ty
405 );
406
407 /// @brief Handle a named type in the symbol table
408 virtual void handleSymbolTableType(
409 unsigned i,
410 unsigned slot,
411 const std::string& name
412 );
413
414 /// @brief Handle a named value in the symbol table
415 virtual void handleSymbolTableValue(
416 unsigned i,
417 unsigned slot,
418 const std::string& name
419 );
420
421 /// @brief Handle the end of a symbol table
422 virtual void handleSymbolTableEnd();
423
424 /// @brief Handle the beginning of a function body
425 virtual void handleFunctionBegin(
426 const Type* FType,
427 GlobalValue::LinkageTypes linkage
428 );
429
430 /// @brief Handle the end of a function body
431 virtual void handleFunctionEnd(
432 const Type* FType
433 );
434
435 /// @brief Handle the beginning of a basic block
436 virtual void handleBasicBlockBegin(
437 unsigned blocknum
438 );
439
440 /// This method is called for each instruction that is parsed.
441 /// @returns true if the instruction is a block terminating instruction
442 /// @brief Handle an instruction
443 virtual bool handleInstruction(
444 unsigned Opcode,
445 const Type* iType,
446 std::vector<unsigned>& Operands
447 );
448
449 /// @brief Handle the end of a basic block
450 virtual void handleBasicBlockEnd(unsigned blocknum);
451
452 /// @brief Handle start of global constants block.
453 virtual void handleGlobalConstantsBegin();
454
455 /// @brief Handle a constant expression
456 virtual void handleConstantExpression(
457 unsigned Opcode,
458 const Type* Typ,
459 std::vector<std::pair<const Type*,unsigned> > ArgVec
460 );
461
462 /// @brief Handle a constant array
463 virtual void handleConstantArray(
464 const ArrayType* AT,
465 std::vector<unsigned>& ElementSlots
466 );
467
468 /// @brief Handle a constant structure
469 virtual void handleConstantStruct(
470 const StructType* ST,
471 std::vector<unsigned>& ElementSlots
472 );
473
474 /// @brief Handle a constant pointer
475 virtual void handleConstantPointer(
476 const PointerType* PT,
477 unsigned Slot
478 );
479
480 /// @brief Handle a constant strings (array special case)
481 virtual void handleConstantString(
482 const ConstantArray* CA
483 );
484
485 /// @brief Handle a primitive constant value
486 virtual void handleConstantValue( Constant * c );
487
488 /// @brief Handle the end of the global constants
489 virtual void handleGlobalConstantsEnd();
490
491/// @}
Reid Spencerdac69c82004-06-07 17:53:43 +0000492
493};
494
Reid Spencerdac69c82004-06-07 17:53:43 +0000495} // End llvm namespace
496
Reid Spencerdac69c82004-06-07 17:53:43 +0000497// vim: sw=2
Reid Spencer29dba0c2004-06-08 05:51:18 +0000498#endif