Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 1 | //===-- Reader.h - Interface To Bytecode Reading ----------------*- C++ -*-===// |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 2 | // |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 5 | // This file was developed by Reid Spencer and is distributed under the |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 6 | // University of Illinois Open Source License. See LICENSE.TXT for details. |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 7 | // |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 10 | // This header file defines the interface to the Bytecode Reader which is |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 11 | // responsible for correctly interpreting bytecode files (backwards compatible) |
| 12 | // and materializing a module from the bytecode read. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef BYTECODE_PARSER_H |
| 17 | #define BYTECODE_PARSER_H |
| 18 | |
| 19 | #include "llvm/Constants.h" |
| 20 | #include "llvm/DerivedTypes.h" |
| 21 | #include "llvm/GlobalValue.h" |
| 22 | #include "llvm/Function.h" |
| 23 | #include "llvm/ModuleProvider.h" |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 24 | #include "llvm/Bytecode/Analyzer.h" |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 25 | #include <utility> |
| 26 | #include <map> |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 27 | #include <setjmp.h> |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 28 | |
| 29 | namespace llvm { |
| 30 | |
| 31 | class BytecodeHandler; ///< Forward declare the handler interface |
| 32 | |
| 33 | /// This class defines the interface for parsing a buffer of bytecode. The |
| 34 | /// parser itself takes no action except to call the various functions of |
| 35 | /// the handler interface. The parser's sole responsibility is the correct |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 36 | /// interpretation of the bytecode buffer. The handler is responsible for |
| 37 | /// instantiating and keeping track of all values. As a convenience, the parser |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 38 | /// is responsible for materializing types and will pass them through the |
| 39 | /// handler interface as necessary. |
| 40 | /// @see BytecodeHandler |
| 41 | /// @brief Bytecode Reader interface |
| 42 | class BytecodeReader : public ModuleProvider { |
| 43 | |
| 44 | /// @name Constructors |
| 45 | /// @{ |
| 46 | public: |
| 47 | /// @brief Default constructor. By default, no handler is used. |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 48 | BytecodeReader(BytecodeHandler* h = 0) { |
Reid Spencer | d3539b8 | 2004-11-14 22:00:09 +0000 | [diff] [blame] | 49 | decompressedBlock = 0; |
Reid Spencer | 17f52c5 | 2004-11-06 23:17:23 +0000 | [diff] [blame] | 50 | Handler = h; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 51 | } |
| 52 | |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 53 | ~BytecodeReader() { |
| 54 | freeState(); |
Chris Lattner | 1992522 | 2004-11-15 21:55:06 +0000 | [diff] [blame] | 55 | if (decompressedBlock) { |
Reid Spencer | d3539b8 | 2004-11-14 22:00:09 +0000 | [diff] [blame] | 56 | ::free(decompressedBlock); |
Chris Lattner | 1992522 | 2004-11-15 21:55:06 +0000 | [diff] [blame] | 57 | decompressedBlock = 0; |
| 58 | } |
Reid Spencer | 17f52c5 | 2004-11-06 23:17:23 +0000 | [diff] [blame] | 59 | } |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 60 | |
| 61 | /// @} |
| 62 | /// @name Types |
| 63 | /// @{ |
| 64 | public: |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 65 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 66 | /// @brief A convenience type for the buffer pointer |
| 67 | typedef const unsigned char* BufPtr; |
| 68 | |
| 69 | /// @brief The type used for a vector of potentially abstract types |
| 70 | typedef std::vector<PATypeHolder> TypeListTy; |
| 71 | |
| 72 | /// This type provides a vector of Value* via the User class for |
| 73 | /// storage of Values that have been constructed when reading the |
| 74 | /// bytecode. Because of forward referencing, constant replacement |
| 75 | /// can occur so we ensure that our list of Value* is updated |
| 76 | /// properly through those transitions. This ensures that the |
| 77 | /// correct Value* is in our list when it comes time to associate |
| 78 | /// constants with global variables at the end of reading the |
| 79 | /// globals section. |
| 80 | /// @brief A list of values as a User of those Values. |
Chris Lattner | cad28bd | 2005-01-29 00:36:19 +0000 | [diff] [blame] | 81 | class ValueList : public User { |
| 82 | std::vector<Use> Uses; |
| 83 | public: |
Chris Lattner | fea4930 | 2005-08-16 21:59:52 +0000 | [diff] [blame] | 84 | ValueList() : User(Type::VoidTy, Value::ArgumentVal, 0, 0) {} |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 85 | |
| 86 | // vector compatibility methods |
| 87 | unsigned size() const { return getNumOperands(); } |
Chris Lattner | cad28bd | 2005-01-29 00:36:19 +0000 | [diff] [blame] | 88 | void push_back(Value *V) { |
| 89 | Uses.push_back(Use(V, this)); |
| 90 | OperandList = &Uses[0]; |
| 91 | ++NumOperands; |
| 92 | } |
| 93 | Value *back() const { return Uses.back(); } |
| 94 | void pop_back() { Uses.pop_back(); --NumOperands; } |
| 95 | bool empty() const { return NumOperands == 0; } |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 96 | virtual void print(std::ostream& os) const { |
Chris Lattner | cad28bd | 2005-01-29 00:36:19 +0000 | [diff] [blame] | 97 | for (unsigned i = 0; i < size(); ++i) { |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 98 | os << i << " "; |
| 99 | getOperand(i)->print(os); |
| 100 | os << "\n"; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 101 | } |
| 102 | } |
| 103 | }; |
| 104 | |
| 105 | /// @brief A 2 dimensional table of values |
| 106 | typedef std::vector<ValueList*> ValueTable; |
| 107 | |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 108 | /// This map is needed so that forward references to constants can be looked |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 109 | /// up by Type and slot number when resolving those references. |
| 110 | /// @brief A mapping of a Type/slot pair to a Constant*. |
Chris Lattner | 389bd04 | 2004-12-09 06:19:44 +0000 | [diff] [blame] | 111 | typedef std::map<std::pair<unsigned,unsigned>, Constant*> ConstantRefsType; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 112 | |
| 113 | /// For lazy read-in of functions, we need to save the location in the |
| 114 | /// data stream where the function is located. This structure provides that |
| 115 | /// information. Lazy read-in is used mostly by the JIT which only wants to |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 116 | /// resolve functions as it needs them. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 117 | /// @brief Keeps pointers to function contents for later use. |
| 118 | struct LazyFunctionInfo { |
| 119 | const unsigned char *Buf, *EndBuf; |
| 120 | LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) |
| 121 | : Buf(B), EndBuf(EB) {} |
| 122 | }; |
| 123 | |
| 124 | /// @brief A mapping of functions to their LazyFunctionInfo for lazy reading. |
| 125 | typedef std::map<Function*, LazyFunctionInfo> LazyFunctionMap; |
| 126 | |
| 127 | /// @brief A list of global variables and the slot number that initializes |
| 128 | /// them. |
| 129 | typedef std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitsList; |
| 130 | |
| 131 | /// This type maps a typeslot/valueslot pair to the corresponding Value*. |
| 132 | /// It is used for dealing with forward references as values are read in. |
| 133 | /// @brief A map for dealing with forward references of values. |
| 134 | typedef std::map<std::pair<unsigned,unsigned>,Value*> ForwardReferenceMap; |
| 135 | |
| 136 | /// @} |
| 137 | /// @name Methods |
| 138 | /// @{ |
| 139 | public: |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 140 | /// @returns true if an error occurred |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 141 | /// @brief Main interface to parsing a bytecode buffer. |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 142 | bool ParseBytecode( |
Anton Korobeynikov | 7d51544 | 2006-09-01 20:35:17 +0000 | [diff] [blame] | 143 | volatile BufPtr Buf, ///< Beginning of the bytecode buffer |
Reid Spencer | 5c15fe5 | 2004-07-05 00:57:50 +0000 | [diff] [blame] | 144 | unsigned Length, ///< Length of the bytecode buffer |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 145 | const std::string &ModuleID, ///< An identifier for the module constructed. |
| 146 | std::string* ErrMsg = 0 ///< Optional place for error message |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 147 | ); |
| 148 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 149 | /// @brief Parse all function bodies |
Reid Spencer | 99655e1 | 2006-08-25 19:54:53 +0000 | [diff] [blame] | 150 | bool ParseAllFunctionBodies(std::string* ErrMsg); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 151 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 152 | /// @brief Parse the next function of specific type |
Reid Spencer | 99655e1 | 2006-08-25 19:54:53 +0000 | [diff] [blame] | 153 | bool ParseFunction(Function* Func, std::string* ErrMsg) ; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 154 | |
| 155 | /// This method is abstract in the parent ModuleProvider class. Its |
| 156 | /// implementation is identical to the ParseFunction method. |
| 157 | /// @see ParseFunction |
| 158 | /// @brief Make a specific function materialize. |
Reid Spencer | 99655e1 | 2006-08-25 19:54:53 +0000 | [diff] [blame] | 159 | virtual bool materializeFunction(Function *F, std::string *ErrMsg = 0) { |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 160 | LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(F); |
Reid Spencer | 99655e1 | 2006-08-25 19:54:53 +0000 | [diff] [blame] | 161 | if (Fi == LazyFunctionLoadMap.end()) |
| 162 | return false; |
| 163 | if (ParseFunction(F,ErrMsg)) |
Chris Lattner | 0300f3e | 2006-07-06 21:35:01 +0000 | [diff] [blame] | 164 | return true; |
Chris Lattner | 0300f3e | 2006-07-06 21:35:01 +0000 | [diff] [blame] | 165 | return false; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 166 | } |
| 167 | |
| 168 | /// This method is abstract in the parent ModuleProvider class. Its |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 169 | /// implementation is identical to ParseAllFunctionBodies. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 170 | /// @see ParseAllFunctionBodies |
| 171 | /// @brief Make the whole module materialize |
Reid Spencer | 99655e1 | 2006-08-25 19:54:53 +0000 | [diff] [blame] | 172 | virtual Module* materializeModule(std::string *ErrMsg = 0) { |
| 173 | if (ParseAllFunctionBodies(ErrMsg)) |
Chris Lattner | 0300f3e | 2006-07-06 21:35:01 +0000 | [diff] [blame] | 174 | return 0; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 175 | return TheModule; |
| 176 | } |
| 177 | |
| 178 | /// This method is provided by the parent ModuleProvde class and overriden |
| 179 | /// here. It simply releases the module from its provided and frees up our |
| 180 | /// state. |
| 181 | /// @brief Release our hold on the generated module |
Chris Lattner | 94aa7f3 | 2006-07-07 06:06:06 +0000 | [diff] [blame] | 182 | Module* releaseModule(std::string *ErrInfo = 0) { |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 183 | // Since we're losing control of this Module, we must hand it back complete |
Reid Spencer | 4952143 | 2006-11-11 11:54:25 +0000 | [diff] [blame] | 184 | Module *M = ModuleProvider::releaseModule(ErrInfo); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 185 | freeState(); |
| 186 | return M; |
| 187 | } |
| 188 | |
| 189 | /// @} |
| 190 | /// @name Parsing Units For Subclasses |
| 191 | /// @{ |
| 192 | protected: |
| 193 | /// @brief Parse whole module scope |
| 194 | void ParseModule(); |
| 195 | |
| 196 | /// @brief Parse the version information block |
| 197 | void ParseVersionInfo(); |
| 198 | |
| 199 | /// @brief Parse the ModuleGlobalInfo block |
| 200 | void ParseModuleGlobalInfo(); |
| 201 | |
| 202 | /// @brief Parse a symbol table |
| 203 | void ParseSymbolTable( Function* Func, SymbolTable *ST); |
| 204 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 205 | /// @brief Parse functions lazily. |
| 206 | void ParseFunctionLazily(); |
| 207 | |
| 208 | /// @brief Parse a function body |
| 209 | void ParseFunctionBody(Function* Func); |
| 210 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 211 | /// @brief Parse the type list portion of a compaction table |
Chris Lattner | 45b5dd2 | 2004-08-03 23:41:28 +0000 | [diff] [blame] | 212 | void ParseCompactionTypes(unsigned NumEntries); |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 213 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 214 | /// @brief Parse a compaction table |
| 215 | void ParseCompactionTable(); |
| 216 | |
| 217 | /// @brief Parse global types |
| 218 | void ParseGlobalTypes(); |
| 219 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 220 | /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) |
| 221 | BasicBlock* ParseBasicBlock(unsigned BlockNo); |
| 222 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 223 | /// @brief parse an instruction list (for post LLVM 1.0 instruction lists |
| 224 | /// with blocks differentiated by terminating instructions. |
| 225 | unsigned ParseInstructionList( |
| 226 | Function* F ///< The function into which BBs will be inserted |
| 227 | ); |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 228 | |
Reid Spencer | 1628cec | 2006-10-26 06:15:43 +0000 | [diff] [blame] | 229 | /// Convert previous opcode values into the current value and/or construct |
| 230 | /// the instruction. This function handles all *abnormal* cases for |
| 231 | /// instruction generation based on obsolete opcode values. The normal cases |
| 232 | /// are handled by the ParseInstruction function. |
Reid Spencer | 6996feb | 2006-11-08 21:27:54 +0000 | [diff] [blame] | 233 | Instruction* upgradeInstrOpcodes( |
Reid Spencer | 1628cec | 2006-10-26 06:15:43 +0000 | [diff] [blame] | 234 | unsigned &opcode, ///< The old opcode, possibly updated by this function |
| 235 | std::vector<unsigned> &Oprnds, ///< The operands to the instruction |
| 236 | unsigned &iType, ///< The type code from the bytecode file |
| 237 | const Type* InstTy, ///< The type of the instruction |
| 238 | BasicBlock* BB ///< The basic block to insert into, if we need to |
| 239 | ); |
| 240 | |
Reid Spencer | 6996feb | 2006-11-08 21:27:54 +0000 | [diff] [blame] | 241 | /// @brief Convert previous opcode values for ConstantExpr into the current |
| 242 | /// value. |
| 243 | unsigned upgradeCEOpcodes( |
| 244 | unsigned Opcode, ///< Opcode read from bytecode |
| 245 | const std::vector<Constant*> &ArgVec ///< Arguments of instruction |
| 246 | ); |
| 247 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 248 | /// @brief Parse a single instruction. |
| 249 | void ParseInstruction( |
| 250 | std::vector<unsigned>& Args, ///< The arguments to be filled in |
| 251 | BasicBlock* BB ///< The BB the instruction goes in |
| 252 | ); |
| 253 | |
| 254 | /// @brief Parse the whole constant pool |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 255 | void ParseConstantPool(ValueTable& Values, TypeListTy& Types, |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 256 | bool isFunction); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 257 | |
Chris Lattner | 3bc5a60 | 2006-01-25 23:08:15 +0000 | [diff] [blame] | 258 | /// @brief Parse a single constant pool value |
| 259 | Value *ParseConstantPoolValue(unsigned TypeID); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 260 | |
| 261 | /// @brief Parse a block of types constants |
Reid Spencer | 6690651 | 2004-07-11 17:24:05 +0000 | [diff] [blame] | 262 | void ParseTypes(TypeListTy &Tab, unsigned NumEntries); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 263 | |
| 264 | /// @brief Parse a single type constant |
Reid Spencer | 6690651 | 2004-07-11 17:24:05 +0000 | [diff] [blame] | 265 | const Type *ParseType(); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 266 | |
| 267 | /// @brief Parse a string constants block |
| 268 | void ParseStringConstants(unsigned NumEntries, ValueTable &Tab); |
| 269 | |
Chris Lattner | f0edc6c | 2006-10-12 18:32:30 +0000 | [diff] [blame] | 270 | /// @brief Release our memory. |
| 271 | void freeState() { |
| 272 | freeTable(FunctionValues); |
| 273 | freeTable(ModuleValues); |
| 274 | } |
| 275 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 276 | /// @} |
| 277 | /// @name Data |
| 278 | /// @{ |
| 279 | private: |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 280 | std::string ErrorMsg; ///< A place to hold an error message through longjmp |
| 281 | jmp_buf context; ///< Where to return to if an error occurs. |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 282 | char* decompressedBlock; ///< Result of decompression |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 283 | BufPtr MemStart; ///< Start of the memory buffer |
| 284 | BufPtr MemEnd; ///< End of the memory buffer |
| 285 | BufPtr BlockStart; ///< Start of current block being parsed |
| 286 | BufPtr BlockEnd; ///< End of current block being parsed |
| 287 | BufPtr At; ///< Where we're currently parsing at |
| 288 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 289 | /// Information about the module, extracted from the bytecode revision number. |
Chris Lattner | 45b5dd2 | 2004-08-03 23:41:28 +0000 | [diff] [blame] | 290 | /// |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 291 | unsigned char RevisionNum; // The rev # itself |
| 292 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 293 | /// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0) |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 294 | |
Chris Lattner | 45b5dd2 | 2004-08-03 23:41:28 +0000 | [diff] [blame] | 295 | /// Revision #0 had an explicit alignment of data only for the |
| 296 | /// ModuleGlobalInfo block. This was fixed to be like all other blocks in 1.2 |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 297 | bool hasInconsistentModuleGlobalInfo; |
| 298 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 299 | /// Revision #0 also explicitly encoded zero values for primitive types like |
| 300 | /// int/sbyte/etc. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 301 | bool hasExplicitPrimitiveZeros; |
| 302 | |
| 303 | // Flags to control features specific the LLVM 1.2 and before (revision #1) |
| 304 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 305 | /// LLVM 1.2 and earlier required that getelementptr structure indices were |
| 306 | /// ubyte constants and that sequential type indices were longs. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 307 | bool hasRestrictedGEPTypes; |
| 308 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 309 | /// LLVM 1.2 and earlier had class Type deriving from Value and the Type |
| 310 | /// objects were located in the "Type Type" plane of various lists in read |
| 311 | /// by the bytecode reader. In LLVM 1.3 this is no longer the case. Types are |
| 312 | /// completely distinct from Values. Consequently, Types are written in fixed |
| 313 | /// locations in LLVM 1.3. This flag indicates that the older Type derived |
| 314 | /// from Value style of bytecode file is being read. |
| 315 | bool hasTypeDerivedFromValue; |
| 316 | |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 317 | /// LLVM 1.2 and earlier encoded block headers as two uint (8 bytes), one for |
Chris Lattner | 45b5dd2 | 2004-08-03 23:41:28 +0000 | [diff] [blame] | 318 | /// the size and one for the type. This is a bit wasteful, especially for |
| 319 | /// small files where the 8 bytes per block is a large fraction of the total |
| 320 | /// block size. In LLVM 1.3, the block type and length are encoded into a |
| 321 | /// single uint32 by restricting the number of block types (limit 31) and the |
| 322 | /// maximum size of a block (limit 2^27-1=134,217,727). Note that the module |
| 323 | /// block still uses the 8-byte format so the maximum size of a file can be |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 324 | /// 2^32-1 bytes long. |
| 325 | bool hasLongBlockHeaders; |
| 326 | |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 327 | /// LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3 |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 328 | /// this has been reduced to vbr_uint24. It shouldn't make much difference |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 329 | /// since we haven't run into a module with > 24 million types, but for safety |
| 330 | /// the 24-bit restriction has been enforced in 1.3 to free some bits in |
| 331 | /// various places and to ensure consistency. In particular, global vars are |
| 332 | /// restricted to 24-bits. |
| 333 | bool has32BitTypes; |
| 334 | |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 335 | /// LLVM 1.2 and earlier did not provide a target triple nor a list of |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 336 | /// libraries on which the bytecode is dependent. LLVM 1.3 provides these |
| 337 | /// features, for use in future versions of LLVM. |
| 338 | bool hasNoDependentLibraries; |
| 339 | |
Reid Spencer | 38d54be | 2004-08-17 07:45:14 +0000 | [diff] [blame] | 340 | /// LLVM 1.3 and earlier caused blocks and other fields to start on 32-bit |
| 341 | /// aligned boundaries. This can lead to as much as 30% bytecode size overhead |
| 342 | /// in various corner cases (lots of long instructions). In LLVM 1.4, |
| 343 | /// alignment of bytecode fields was done away with completely. |
| 344 | bool hasAlignment; |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 345 | |
Chris Lattner | a79e7cc | 2004-10-16 18:18:16 +0000 | [diff] [blame] | 346 | // In version 4 and earlier, the bytecode format did not support the 'undef' |
| 347 | // constant. |
| 348 | bool hasNoUndefValue; |
| 349 | |
| 350 | // In version 4 and earlier, the bytecode format did not save space for flags |
| 351 | // in the global info block for functions. |
| 352 | bool hasNoFlagsForFunctions; |
| 353 | |
| 354 | // In version 4 and earlier, there was no opcode space reserved for the |
| 355 | // unreachable instruction. |
| 356 | bool hasNoUnreachableInst; |
| 357 | |
Reid Spencer | 6996feb | 2006-11-08 21:27:54 +0000 | [diff] [blame] | 358 | // In version 6, the Div and Rem instructions were converted to be the |
| 359 | // signed instructions UDiv, SDiv, URem and SRem. This flag will be true if |
| 360 | // the Div and Rem instructions are signless (ver 5 and prior). |
| 361 | bool hasSignlessDivRem; |
| 362 | |
| 363 | // In version 7, the Shr, Cast and Setcc instructions changed to their |
| 364 | // signed counterparts. This flag will be true if these instructions are |
| 365 | // signless (version 6 and prior). |
| 366 | bool hasSignlessShrCastSetcc; |
Reid Spencer | 1628cec | 2006-10-26 06:15:43 +0000 | [diff] [blame] | 367 | |
Reid Spencer | 3e59546 | 2006-01-19 06:57:58 +0000 | [diff] [blame] | 368 | /// In release 1.7 we changed intrinsic functions to not be overloaded. There |
| 369 | /// is no bytecode change for this, but to optimize the auto-upgrade of calls |
Reid Spencer | e2a5fb0 | 2006-01-27 11:49:27 +0000 | [diff] [blame] | 370 | /// to intrinsic functions, we save a mapping of old function definitions to |
| 371 | /// the new ones so call instructions can be upgraded efficiently. |
| 372 | std::map<Function*,Function*> upgradedFunctions; |
Reid Spencer | 3e59546 | 2006-01-19 06:57:58 +0000 | [diff] [blame] | 373 | |
Chris Lattner | 45b5dd2 | 2004-08-03 23:41:28 +0000 | [diff] [blame] | 374 | /// CompactionTypes - If a compaction table is active in the current function, |
| 375 | /// this is the mapping that it contains. We keep track of what resolved type |
| 376 | /// it is as well as what global type entry it is. |
| 377 | std::vector<std::pair<const Type*, unsigned> > CompactionTypes; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 378 | |
| 379 | /// @brief If a compaction table is active in the current function, |
| 380 | /// this is the mapping that it contains. |
| 381 | std::vector<std::vector<Value*> > CompactionValues; |
| 382 | |
| 383 | /// @brief This vector is used to deal with forward references to types in |
| 384 | /// a module. |
| 385 | TypeListTy ModuleTypes; |
Chris Lattner | eebac5f | 2005-10-03 21:26:53 +0000 | [diff] [blame] | 386 | |
| 387 | /// @brief This is an inverse mapping of ModuleTypes from the type to an |
| 388 | /// index. Because refining types causes the index of this map to be |
| 389 | /// invalidated, any time we refine a type, we clear this cache and recompute |
| 390 | /// it next time we need it. These entries are ordered by the pointer value. |
| 391 | std::vector<std::pair<const Type*, unsigned> > ModuleTypeIDCache; |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 392 | |
| 393 | /// @brief This vector is used to deal with forward references to types in |
| 394 | /// a function. |
| 395 | TypeListTy FunctionTypes; |
| 396 | |
| 397 | /// When the ModuleGlobalInfo section is read, we create a Function object |
| 398 | /// for each function in the module. When the function is loaded, after the |
| 399 | /// module global info is read, this Function is populated. Until then, the |
| 400 | /// functions in this vector just hold the function signature. |
| 401 | std::vector<Function*> FunctionSignatureList; |
| 402 | |
| 403 | /// @brief This is the table of values belonging to the current function |
| 404 | ValueTable FunctionValues; |
| 405 | |
| 406 | /// @brief This is the table of values belonging to the module (global) |
| 407 | ValueTable ModuleValues; |
| 408 | |
| 409 | /// @brief This keeps track of function level forward references. |
| 410 | ForwardReferenceMap ForwardReferences; |
| 411 | |
| 412 | /// @brief The basic blocks we've parsed, while parsing a function. |
| 413 | std::vector<BasicBlock*> ParsedBasicBlocks; |
| 414 | |
Chris Lattner | 1c765b0 | 2004-10-14 01:49:34 +0000 | [diff] [blame] | 415 | /// This maintains a mapping between <Type, Slot #>'s and forward references |
| 416 | /// to constants. Such values may be referenced before they are defined, and |
| 417 | /// if so, the temporary object that they represent is held here. @brief |
| 418 | /// Temporary place for forward references to constants. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 419 | ConstantRefsType ConstantFwdRefs; |
| 420 | |
| 421 | /// Constant values are read in after global variables. Because of this, we |
| 422 | /// must defer setting the initializers on global variables until after module |
Chris Lattner | 1c765b0 | 2004-10-14 01:49:34 +0000 | [diff] [blame] | 423 | /// level constants have been read. In the mean time, this list keeps track |
| 424 | /// of what we must do. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 425 | GlobalInitsList GlobalInits; |
| 426 | |
| 427 | // For lazy reading-in of functions, we need to save away several pieces of |
| 428 | // information about each function: its begin and end pointer in the buffer |
| 429 | // and its FunctionSlot. |
| 430 | LazyFunctionMap LazyFunctionLoadMap; |
| 431 | |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 432 | /// This stores the parser's handler which is used for handling tasks other |
| 433 | /// just than reading bytecode into the IR. If this is non-null, calls on |
| 434 | /// the (polymorphic) BytecodeHandler interface (see llvm/Bytecode/Handler.h) |
| 435 | /// will be made to report the logical structure of the bytecode file. What |
| 436 | /// the handler does with the events it receives is completely orthogonal to |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 437 | /// the business of parsing the bytecode and building the IR. This is used, |
| 438 | /// for example, by the llvm-abcd tool for analysis of byte code. |
| 439 | /// @brief Handler for parsing events. |
| 440 | BytecodeHandler* Handler; |
| 441 | |
Reid Spencer | 3e59546 | 2006-01-19 06:57:58 +0000 | [diff] [blame] | 442 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 443 | /// @} |
| 444 | /// @name Implementation Details |
| 445 | /// @{ |
| 446 | private: |
| 447 | /// @brief Determines if this module has a function or not. |
| 448 | bool hasFunctions() { return ! FunctionSignatureList.empty(); } |
| 449 | |
| 450 | /// @brief Determines if the type id has an implicit null value. |
| 451 | bool hasImplicitNull(unsigned TyID ); |
| 452 | |
| 453 | /// @brief Converts a type slot number to its Type* |
| 454 | const Type *getType(unsigned ID); |
| 455 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 456 | /// @brief Converts a pre-sanitized type slot number to its Type* and |
| 457 | /// sanitizes the type id. |
| 458 | inline const Type* getSanitizedType(unsigned& ID ); |
| 459 | |
| 460 | /// @brief Read in and get a sanitized type id |
Chris Lattner | 1992522 | 2004-11-15 21:55:06 +0000 | [diff] [blame] | 461 | inline const Type* readSanitizedType(); |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 462 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 463 | /// @brief Converts a Type* to its type slot number |
| 464 | unsigned getTypeSlot(const Type *Ty); |
| 465 | |
| 466 | /// @brief Converts a normal type slot number to a compacted type slot num. |
| 467 | unsigned getCompactionTypeSlot(unsigned type); |
| 468 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 469 | /// @brief Gets the global type corresponding to the TypeId |
| 470 | const Type *getGlobalTableType(unsigned TypeId); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 471 | |
| 472 | /// This is just like getTypeSlot, but when a compaction table is in use, |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 473 | /// it is ignored. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 474 | unsigned getGlobalTableTypeSlot(const Type *Ty); |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 475 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 476 | /// @brief Get a value from its typeid and slot number |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 477 | Value* getValue(unsigned TypeID, unsigned num, bool Create = true); |
| 478 | |
Chris Lattner | 2c6c14d | 2004-08-04 00:19:23 +0000 | [diff] [blame] | 479 | /// @brief Get a value from its type and slot number, ignoring compaction |
| 480 | /// tables. |
| 481 | Value *getGlobalTableValue(unsigned TyID, unsigned SlotNo); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 482 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 483 | /// @brief Get a basic block for current function |
| 484 | BasicBlock *getBasicBlock(unsigned ID); |
| 485 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 486 | /// @brief Get a constant value from its typeid and value slot. |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 487 | Constant* getConstantValue(unsigned typeSlot, unsigned valSlot); |
| 488 | |
| 489 | /// @brief Convenience function for getting a constant value when |
| 490 | /// the Type has already been resolved. |
| 491 | Constant* getConstantValue(const Type *Ty, unsigned valSlot) { |
| 492 | return getConstantValue(getTypeSlot(Ty), valSlot); |
| 493 | } |
| 494 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 495 | /// @brief Insert a newly created value |
| 496 | unsigned insertValue(Value *V, unsigned Type, ValueTable &Table); |
| 497 | |
| 498 | /// @brief Insert the arguments of a function. |
| 499 | void insertArguments(Function* F ); |
| 500 | |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 501 | /// @brief Resolve all references to the placeholder (if any) for the |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 502 | /// given constant. |
Chris Lattner | 389bd04 | 2004-12-09 06:19:44 +0000 | [diff] [blame] | 503 | void ResolveReferencesToConstant(Constant *C, unsigned Typ, unsigned Slot); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 504 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 505 | /// @brief Free a table, making sure to free the ValueList in the table. |
| 506 | void freeTable(ValueTable &Tab) { |
| 507 | while (!Tab.empty()) { |
| 508 | delete Tab.back(); |
| 509 | Tab.pop_back(); |
| 510 | } |
| 511 | } |
| 512 | |
Reid Spencer | 233fe72 | 2006-08-22 16:09:19 +0000 | [diff] [blame] | 513 | inline void error(const std::string& errmsg); |
Reid Spencer | 2439972 | 2004-07-09 22:21:33 +0000 | [diff] [blame] | 514 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 515 | BytecodeReader(const BytecodeReader &); // DO NOT IMPLEMENT |
| 516 | void operator=(const BytecodeReader &); // DO NOT IMPLEMENT |
| 517 | |
| 518 | /// @} |
| 519 | /// @name Reader Primitives |
| 520 | /// @{ |
| 521 | private: |
| 522 | |
| 523 | /// @brief Is there more to parse in the current block? |
| 524 | inline bool moreInBlock(); |
| 525 | |
| 526 | /// @brief Have we read past the end of the block |
| 527 | inline void checkPastBlockEnd(const char * block_name); |
| 528 | |
| 529 | /// @brief Align to 32 bits |
| 530 | inline void align32(); |
| 531 | |
| 532 | /// @brief Read an unsigned integer as 32-bits |
| 533 | inline unsigned read_uint(); |
| 534 | |
| 535 | /// @brief Read an unsigned integer with variable bit rate encoding |
| 536 | inline unsigned read_vbr_uint(); |
| 537 | |
Reid Spencer | ad89bd6 | 2004-07-25 18:07:36 +0000 | [diff] [blame] | 538 | /// @brief Read an unsigned integer of no more than 24-bits with variable |
| 539 | /// bit rate encoding. |
| 540 | inline unsigned read_vbr_uint24(); |
| 541 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 542 | /// @brief Read an unsigned 64-bit integer with variable bit rate encoding. |
| 543 | inline uint64_t read_vbr_uint64(); |
| 544 | |
| 545 | /// @brief Read a signed 64-bit integer with variable bit rate encoding. |
| 546 | inline int64_t read_vbr_int64(); |
| 547 | |
| 548 | /// @brief Read a string |
| 549 | inline std::string read_str(); |
| 550 | |
Reid Spencer | 6690651 | 2004-07-11 17:24:05 +0000 | [diff] [blame] | 551 | /// @brief Read a float value |
| 552 | inline void read_float(float& FloatVal); |
| 553 | |
| 554 | /// @brief Read a double value |
| 555 | inline void read_double(double& DoubleVal); |
| 556 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 557 | /// @brief Read an arbitrary data chunk of fixed length |
| 558 | inline void read_data(void *Ptr, void *End); |
| 559 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 560 | /// @brief Read a bytecode block header |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 561 | inline void read_block(unsigned &Type, unsigned &Size); |
| 562 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 563 | /// @brief Read a type identifier and sanitize it. |
| 564 | inline bool read_typeid(unsigned &TypeId); |
| 565 | |
| 566 | /// @brief Recalculate type ID for pre 1.3 bytecode files. |
| 567 | inline bool sanitizeTypeId(unsigned &TypeId ); |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 568 | /// @} |
| 569 | }; |
| 570 | |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 571 | /// @brief A function for creating a BytecodeAnalzer as a handler |
| 572 | /// for the Bytecode reader. |
Misha Brukman | 8a96c53 | 2005-04-21 21:44:41 +0000 | [diff] [blame] | 573 | BytecodeHandler* createBytecodeAnalyzerHandler(BytecodeAnalysis& bca, |
Reid Spencer | 572c256 | 2004-08-21 20:50:49 +0000 | [diff] [blame] | 574 | std::ostream* output ); |
Reid Spencer | a86159c | 2004-07-04 11:04:56 +0000 | [diff] [blame] | 575 | |
| 576 | |
Reid Spencer | f89143c | 2004-06-29 23:31:01 +0000 | [diff] [blame] | 577 | } // End llvm namespace |
| 578 | |
| 579 | // vim: sw=2 |
| 580 | #endif |