blob: a5827589b48d41bef54609f08540830c2f0d4450 [file] [log] [blame]
Reid Spencerf4ec6382004-06-29 23:31:01 +00001//===-- Reader.h - Interface To Bytecode Reading ----------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Reid Spencer and is distributed under the
6// University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header file defines the interface to the Bytecode Reader which is
11// responsible for correctly interpreting bytecode files (backwards compatible)
12// and materializing a module from the bytecode read.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef BYTECODE_PARSER_H
17#define BYTECODE_PARSER_H
18
19#include "llvm/Constants.h"
20#include "llvm/DerivedTypes.h"
21#include "llvm/GlobalValue.h"
22#include "llvm/Function.h"
23#include "llvm/ModuleProvider.h"
24#include <utility>
25#include <map>
26
27namespace llvm {
28
29class BytecodeHandler; ///< Forward declare the handler interface
30
31/// This class defines the interface for parsing a buffer of bytecode. The
32/// parser itself takes no action except to call the various functions of
33/// the handler interface. The parser's sole responsibility is the correct
34/// interpretation of the bytecode buffer. The handler is responsible for
35/// instantiating and keeping track of all values. As a convenience, the parser
36/// is responsible for materializing types and will pass them through the
37/// handler interface as necessary.
38/// @see BytecodeHandler
39/// @brief Bytecode Reader interface
40class BytecodeReader : public ModuleProvider {
41
42/// @name Constructors
43/// @{
44public:
45 /// @brief Default constructor. By default, no handler is used.
46 BytecodeReader(
47 BytecodeHandler* h = 0
48 ) {
49 Handler = h;
50 }
51
52 ~BytecodeReader() { freeState(); }
53
54/// @}
55/// @name Types
56/// @{
57public:
58 /// @brief A convenience type for the buffer pointer
59 typedef const unsigned char* BufPtr;
60
61 /// @brief The type used for a vector of potentially abstract types
62 typedef std::vector<PATypeHolder> TypeListTy;
63
64 /// This type provides a vector of Value* via the User class for
65 /// storage of Values that have been constructed when reading the
66 /// bytecode. Because of forward referencing, constant replacement
67 /// can occur so we ensure that our list of Value* is updated
68 /// properly through those transitions. This ensures that the
69 /// correct Value* is in our list when it comes time to associate
70 /// constants with global variables at the end of reading the
71 /// globals section.
72 /// @brief A list of values as a User of those Values.
73 struct ValueList : public User {
74 ValueList() : User(Type::TypeTy, Value::TypeVal) {}
75
76 // vector compatibility methods
77 unsigned size() const { return getNumOperands(); }
78 void push_back(Value *V) { Operands.push_back(Use(V, this)); }
79 Value *back() const { return Operands.back(); }
80 void pop_back() { Operands.pop_back(); }
81 bool empty() const { return Operands.empty(); }
82 // must override this
83 virtual void print(std::ostream& os) const {
84 for ( unsigned i = 0; i < size(); i++ ) {
85 os << i << " ";
86 getOperand(i)->print(os);
87 os << "\n";
88 }
89 }
90 };
91
92 /// @brief A 2 dimensional table of values
93 typedef std::vector<ValueList*> ValueTable;
94
95 /// This map is needed so that forward references to constants can be looked
96 /// up by Type and slot number when resolving those references.
97 /// @brief A mapping of a Type/slot pair to a Constant*.
98 typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
99
100 /// For lazy read-in of functions, we need to save the location in the
101 /// data stream where the function is located. This structure provides that
102 /// information. Lazy read-in is used mostly by the JIT which only wants to
103 /// resolve functions as it needs them.
104 /// @brief Keeps pointers to function contents for later use.
105 struct LazyFunctionInfo {
106 const unsigned char *Buf, *EndBuf;
107 LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
108 : Buf(B), EndBuf(EB) {}
109 };
110
111 /// @brief A mapping of functions to their LazyFunctionInfo for lazy reading.
112 typedef std::map<Function*, LazyFunctionInfo> LazyFunctionMap;
113
114 /// @brief A list of global variables and the slot number that initializes
115 /// them.
116 typedef std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitsList;
117
118 /// This type maps a typeslot/valueslot pair to the corresponding Value*.
119 /// It is used for dealing with forward references as values are read in.
120 /// @brief A map for dealing with forward references of values.
121 typedef std::map<std::pair<unsigned,unsigned>,Value*> ForwardReferenceMap;
122
123/// @}
124/// @name Methods
125/// @{
126public:
127 /// This function completely parses a bytecode buffer given by the \p Buf
128 /// and \p Length parameters. The
129 /// @brief Main interface to parsing a bytecode buffer.
130 void ParseBytecode(
131 const unsigned char *Buf, ///< Beginning of the bytecode buffer
132 unsigned Length, ///< Length of the bytecode buffer
133 const std::string &ModuleID ///< An identifier for the module constructed.
134 );
135
136 /// The ParseAllFunctionBodies method parses through all the previously
137 /// unparsed functions in the bytecode file. If you want to completely parse
138 /// a bytecode file, this method should be called after Parsebytecode because
139 /// Parsebytecode only records the locations in the bytecode file of where
140 /// the function definitions are located. This function uses that information
141 /// to materialize the functions.
142 /// @see ParseBytecode
143 /// @brief Parse all function bodies
144 void ParseAllFunctionBodies ();
145
146 /// The ParserFunction method lazily parses one function. Use this method to
147 /// casue the parser to parse a specific function in the module. Note that
148 /// this will remove the function from what is to be included by
149 /// ParseAllFunctionBodies.
150 /// @see ParseAllFunctionBodies
151 /// @see ParseBytecode
152 /// @brief Parse the next function of specific type
153 void ParseFunction (Function* Func) ;
154
155 /// This method is abstract in the parent ModuleProvider class. Its
156 /// implementation is identical to the ParseFunction method.
157 /// @see ParseFunction
158 /// @brief Make a specific function materialize.
159 virtual void materializeFunction(Function *F) {
160 LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(F);
161 if (Fi == LazyFunctionLoadMap.end()) return;
162 ParseFunction(F);
163 }
164
165 /// This method is abstract in the parent ModuleProvider class. Its
166 /// implementation is identical to ParseAllFunctionBodies.
167 /// @see ParseAllFunctionBodies
168 /// @brief Make the whole module materialize
169 virtual Module* materializeModule() {
170 ParseAllFunctionBodies();
171 return TheModule;
172 }
173
174 /// This method is provided by the parent ModuleProvde class and overriden
175 /// here. It simply releases the module from its provided and frees up our
176 /// state.
177 /// @brief Release our hold on the generated module
178 Module* releaseModule() {
179 // Since we're losing control of this Module, we must hand it back complete
180 Module *M = ModuleProvider::releaseModule();
181 freeState();
182 return M;
183 }
184
185/// @}
186/// @name Parsing Units For Subclasses
187/// @{
188protected:
189 /// @brief Parse whole module scope
190 void ParseModule();
191
192 /// @brief Parse the version information block
193 void ParseVersionInfo();
194
195 /// @brief Parse the ModuleGlobalInfo block
196 void ParseModuleGlobalInfo();
197
198 /// @brief Parse a symbol table
199 void ParseSymbolTable( Function* Func, SymbolTable *ST);
200
201 /// This function parses LLVM functions lazily. It obtains the type of the
202 /// function and records where the body of the function is in the bytecode
203 /// buffer. The caller can then use the ParseNextFunction and
204 /// ParseAllFunctionBodies to get handler events for the functions.
205 /// @brief Parse functions lazily.
206 void ParseFunctionLazily();
207
208 /// @brief Parse a function body
209 void ParseFunctionBody(Function* Func);
210
211 /// @brief Parse a compaction table
212 void ParseCompactionTable();
213
214 /// @brief Parse global types
215 void ParseGlobalTypes();
216
217 /// @returns The basic block constructed.
218 /// @brief Parse a basic block (for LLVM 1.0 basic block blocks)
219 BasicBlock* ParseBasicBlock(unsigned BlockNo);
220
221 /// @returns Rhe number of basic blocks encountered.
222 /// @brief parse an instruction list (for post LLVM 1.0 instruction lists
223 /// with blocks differentiated by terminating instructions.
224 unsigned ParseInstructionList(
225 Function* F ///< The function into which BBs will be inserted
226 );
227
228 /// This method parses a single instruction. The instruction is
229 /// inserted at the end of the \p BB provided. The arguments of
230 /// the instruction are provided in the \p Args vector.
231 /// @brief Parse a single instruction.
232 void ParseInstruction(
233 std::vector<unsigned>& Args, ///< The arguments to be filled in
234 BasicBlock* BB ///< The BB the instruction goes in
235 );
236
237 /// @brief Parse the whole constant pool
238 void ParseConstantPool(ValueTable& Values, TypeListTy& Types);
239
240 /// @brief Parse a single constant value
241 Constant* ParseConstantValue(unsigned TypeID);
242
243 /// @brief Parse a block of types constants
244 void ParseTypeConstants(TypeListTy &Tab, unsigned NumEntries);
245
246 /// @brief Parse a single type constant
247 const Type *ParseTypeConstant();
248
249 /// @brief Parse a string constants block
250 void ParseStringConstants(unsigned NumEntries, ValueTable &Tab);
251
252/// @}
253/// @name Data
254/// @{
255private:
256 BufPtr MemStart; ///< Start of the memory buffer
257 BufPtr MemEnd; ///< End of the memory buffer
258 BufPtr BlockStart; ///< Start of current block being parsed
259 BufPtr BlockEnd; ///< End of current block being parsed
260 BufPtr At; ///< Where we're currently parsing at
261
262 // Information about the module, extracted from the bytecode revision number.
263 unsigned char RevisionNum; // The rev # itself
264
265 // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
266
267 // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
268 // block. This was fixed to be like all other blocks in 1.2
269 bool hasInconsistentModuleGlobalInfo;
270
271 // Revision #0 also explicitly encoded zero values for primitive types like
272 // int/sbyte/etc.
273 bool hasExplicitPrimitiveZeros;
274
275 // Flags to control features specific the LLVM 1.2 and before (revision #1)
276
277 // LLVM 1.2 and earlier required that getelementptr structure indices were
278 // ubyte constants and that sequential type indices were longs.
279 bool hasRestrictedGEPTypes;
280
281 /// CompactionTable - If a compaction table is active in the current function,
282 /// this is the mapping that it contains.
283 std::vector<const Type*> CompactionTypes;
284
285 /// @brief If a compaction table is active in the current function,
286 /// this is the mapping that it contains.
287 std::vector<std::vector<Value*> > CompactionValues;
288
289 /// @brief This vector is used to deal with forward references to types in
290 /// a module.
291 TypeListTy ModuleTypes;
292
293 /// @brief This vector is used to deal with forward references to types in
294 /// a function.
295 TypeListTy FunctionTypes;
296
297 /// When the ModuleGlobalInfo section is read, we create a Function object
298 /// for each function in the module. When the function is loaded, after the
299 /// module global info is read, this Function is populated. Until then, the
300 /// functions in this vector just hold the function signature.
301 std::vector<Function*> FunctionSignatureList;
302
303 /// @brief This is the table of values belonging to the current function
304 ValueTable FunctionValues;
305
306 /// @brief This is the table of values belonging to the module (global)
307 ValueTable ModuleValues;
308
309 /// @brief This keeps track of function level forward references.
310 ForwardReferenceMap ForwardReferences;
311
312 /// @brief The basic blocks we've parsed, while parsing a function.
313 std::vector<BasicBlock*> ParsedBasicBlocks;
314
315 /// This maintains a mapping between <Type, Slot #>'s and
316 /// forward references to constants. Such values may be referenced before they
317 /// are defined, and if so, the temporary object that they represent is held
318 /// here.
319 /// @brief Temporary place for forward references to constants.
320 ConstantRefsType ConstantFwdRefs;
321
322 /// Constant values are read in after global variables. Because of this, we
323 /// must defer setting the initializers on global variables until after module
324 /// level constants have been read. In the mean time, this list keeps track of
325 /// what we must do.
326 GlobalInitsList GlobalInits;
327
328 // For lazy reading-in of functions, we need to save away several pieces of
329 // information about each function: its begin and end pointer in the buffer
330 // and its FunctionSlot.
331 LazyFunctionMap LazyFunctionLoadMap;
332
333 /// This stores the parser's handler which is used for handling tasks other
334 /// just than reading bytecode into the IR. If this is non-null, calls on
335 /// the (polymorphic) BytecodeHandler interface (see llvm/Bytecode/Handler.h)
336 /// will be made to report the logical structure of the bytecode file. What
337 /// the handler does with the events it receives is completely orthogonal to
338 /// the business of parsing the bytecode and building the IR. This is used,
339 /// for example, by the llvm-abcd tool for analysis of byte code.
340 /// @brief Handler for parsing events.
341 BytecodeHandler* Handler;
342
343/// @}
344/// @name Implementation Details
345/// @{
346private:
347 /// @brief Determines if this module has a function or not.
348 bool hasFunctions() { return ! FunctionSignatureList.empty(); }
349
350 /// @brief Determines if the type id has an implicit null value.
351 bool hasImplicitNull(unsigned TyID );
352
353 /// @brief Converts a type slot number to its Type*
354 const Type *getType(unsigned ID);
355
356 /// @brief Converts a Type* to its type slot number
357 unsigned getTypeSlot(const Type *Ty);
358
359 /// @brief Converts a normal type slot number to a compacted type slot num.
360 unsigned getCompactionTypeSlot(unsigned type);
361
362 /// This is just like getType, but when a compaction table is in use, it is
363 /// ignored. Also, no forward references or other fancy features are
364 /// supported.
365 const Type *getGlobalTableType(unsigned Slot);
366
367 /// This is just like getTypeSlot, but when a compaction table is in use,
368 /// it is ignored.
369 unsigned getGlobalTableTypeSlot(const Type *Ty);
370
371 /// Retrieve a value of a given type and slot number, possibly creating
372 /// it if it doesn't already exist.
373 Value* getValue(unsigned TypeID, unsigned num, bool Create = true);
374
375 /// This is just like getValue, but when a compaction table is in use, it
376 /// is ignored. Also, no forward references or other fancy features are
377 /// supported.
378 Value *getGlobalTableValue(const Type *Ty, unsigned SlotNo);
379
380 /// This function is used when construction phi, br, switch, and other
381 /// instructions that reference basic blocks. Blocks are numbered
382 /// sequentially as they appear in the function.
383 /// @brief Get a basic block for current function
384 BasicBlock *getBasicBlock(unsigned ID);
385
386 /// Just like getValue, except that it returns a null pointer
387 /// only on error. It always returns a constant (meaning that if the value is
388 /// defined, but is not a constant, that is an error). If the specified
389 /// constant hasn't been parsed yet, a placeholder is defined and used.
390 /// Later, after the real value is parsed, the placeholder is eliminated.
391 Constant* getConstantValue(unsigned typeSlot, unsigned valSlot);
392
393 /// @brief Convenience function for getting a constant value when
394 /// the Type has already been resolved.
395 Constant* getConstantValue(const Type *Ty, unsigned valSlot) {
396 return getConstantValue(getTypeSlot(Ty), valSlot);
397 }
398
399 /// As values are created, they are inserted into the appropriate place
400 /// with this method. The ValueTable argument must be one of ModuleValues
401 /// or FunctionValues data members of this class.
402 /// @brief Insert a newly created value
403 unsigned insertValue(Value *V, unsigned Type, ValueTable &Table);
404
405 /// @brief Insert the arguments of a function.
406 void insertArguments(Function* F );
407
408 /// @brief Resolve all references to the placeholder (if any) for the
409 /// given constant.
410 void ResolveReferencesToConstant(Constant *C, unsigned Slot);
411
412 /// @brief Release our memory.
413 void freeState() {
414 freeTable(FunctionValues);
415 freeTable(ModuleValues);
416 }
417
418 /// @brief Free a table, making sure to free the ValueList in the table.
419 void freeTable(ValueTable &Tab) {
420 while (!Tab.empty()) {
421 delete Tab.back();
422 Tab.pop_back();
423 }
424 }
425
426 BytecodeReader(const BytecodeReader &); // DO NOT IMPLEMENT
427 void operator=(const BytecodeReader &); // DO NOT IMPLEMENT
428
429/// @}
430/// @name Reader Primitives
431/// @{
432private:
433
434 /// @brief Is there more to parse in the current block?
435 inline bool moreInBlock();
436
437 /// @brief Have we read past the end of the block
438 inline void checkPastBlockEnd(const char * block_name);
439
440 /// @brief Align to 32 bits
441 inline void align32();
442
443 /// @brief Read an unsigned integer as 32-bits
444 inline unsigned read_uint();
445
446 /// @brief Read an unsigned integer with variable bit rate encoding
447 inline unsigned read_vbr_uint();
448
449 /// @brief Read an unsigned 64-bit integer with variable bit rate encoding.
450 inline uint64_t read_vbr_uint64();
451
452 /// @brief Read a signed 64-bit integer with variable bit rate encoding.
453 inline int64_t read_vbr_int64();
454
455 /// @brief Read a string
456 inline std::string read_str();
457
458 /// @brief Read an arbitrary data chunk of fixed length
459 inline void read_data(void *Ptr, void *End);
460
461 /// Read a bytecode block header
462 inline void read_block(unsigned &Type, unsigned &Size);
463
464/// @}
465};
466
467} // End llvm namespace
468
469// vim: sw=2
470#endif