Shih-wei Liao | e264f62 | 2010-02-10 11:10:31 -0800 | [diff] [blame^] | 1 | //===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file defines the parser class for .ll files. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_ASMPARSER_LLPARSER_H |
| 15 | #define LLVM_ASMPARSER_LLPARSER_H |
| 16 | |
| 17 | #include "LLLexer.h" |
| 18 | #include "llvm/Module.h" |
| 19 | #include "llvm/Type.h" |
| 20 | #include "llvm/Support/ValueHandle.h" |
| 21 | #include <map> |
| 22 | |
| 23 | namespace llvm { |
| 24 | class Module; |
| 25 | class OpaqueType; |
| 26 | class Function; |
| 27 | class Value; |
| 28 | class BasicBlock; |
| 29 | class Instruction; |
| 30 | class Constant; |
| 31 | class GlobalValue; |
| 32 | class MDString; |
| 33 | class MDNode; |
| 34 | |
| 35 | /// ValID - Represents a reference of a definition of some sort with no type. |
| 36 | /// There are several cases where we have to parse the value but where the |
| 37 | /// type can depend on later context. This may either be a numeric reference |
| 38 | /// or a symbolic (%var) reference. This is just a discriminated union. |
| 39 | struct ValID { |
| 40 | enum { |
| 41 | t_LocalID, t_GlobalID, // ID in UIntVal. |
| 42 | t_LocalName, t_GlobalName, // Name in StrVal. |
| 43 | t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. |
| 44 | t_Null, t_Undef, t_Zero, // No value. |
| 45 | t_EmptyArray, // No value: [] |
| 46 | t_Constant, // Value in ConstantVal. |
| 47 | t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. |
| 48 | t_MDNode, // Value in MDNodeVal. |
| 49 | t_MDString // Value in MDStringVal. |
| 50 | } Kind; |
| 51 | |
| 52 | LLLexer::LocTy Loc; |
| 53 | unsigned UIntVal; |
| 54 | std::string StrVal, StrVal2; |
| 55 | APSInt APSIntVal; |
| 56 | APFloat APFloatVal; |
| 57 | Constant *ConstantVal; |
| 58 | MDNode *MDNodeVal; |
| 59 | MDString *MDStringVal; |
| 60 | ValID() : APFloatVal(0.0) {} |
| 61 | |
| 62 | bool operator<(const ValID &RHS) const { |
| 63 | if (Kind == t_LocalID || Kind == t_GlobalID) |
| 64 | return UIntVal < RHS.UIntVal; |
| 65 | assert((Kind == t_LocalName || Kind == t_GlobalName) && |
| 66 | "Ordering not defined for this ValID kind yet"); |
| 67 | return StrVal < RHS.StrVal; |
| 68 | } |
| 69 | }; |
| 70 | |
| 71 | class LLParser { |
| 72 | public: |
| 73 | typedef LLLexer::LocTy LocTy; |
| 74 | private: |
| 75 | LLVMContext& Context; |
| 76 | LLLexer Lex; |
| 77 | Module *M; |
| 78 | |
| 79 | // Type resolution handling data structures. |
| 80 | std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes; |
| 81 | std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs; |
| 82 | std::vector<PATypeHolder> NumberedTypes; |
| 83 | std::vector<TrackingVH<MDNode> > NumberedMetadata; |
| 84 | std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes; |
| 85 | struct UpRefRecord { |
| 86 | /// Loc - This is the location of the upref. |
| 87 | LocTy Loc; |
| 88 | |
| 89 | /// NestingLevel - The number of nesting levels that need to be popped |
| 90 | /// before this type is resolved. |
| 91 | unsigned NestingLevel; |
| 92 | |
| 93 | /// LastContainedTy - This is the type at the current binding level for |
| 94 | /// the type. Every time we reduce the nesting level, this gets updated. |
| 95 | const Type *LastContainedTy; |
| 96 | |
| 97 | /// UpRefTy - This is the actual opaque type that the upreference is |
| 98 | /// represented with. |
| 99 | OpaqueType *UpRefTy; |
| 100 | |
| 101 | UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy) |
| 102 | : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy), |
| 103 | UpRefTy(URTy) {} |
| 104 | }; |
| 105 | std::vector<UpRefRecord> UpRefs; |
| 106 | |
| 107 | // Global Value reference information. |
| 108 | std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals; |
| 109 | std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs; |
| 110 | std::vector<GlobalValue*> NumberedVals; |
| 111 | |
| 112 | // References to blockaddress. The key is the function ValID, the value is |
| 113 | // a list of references to blocks in that function. |
| 114 | std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > > |
| 115 | ForwardRefBlockAddresses; |
| 116 | |
| 117 | Function *MallocF; |
| 118 | public: |
| 119 | LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : |
| 120 | Context(m->getContext()), Lex(F, SM, Err, m->getContext()), |
| 121 | M(m), MallocF(NULL) {} |
| 122 | bool Run(); |
| 123 | |
| 124 | LLVMContext& getContext() { return Context; } |
| 125 | |
| 126 | private: |
| 127 | |
| 128 | bool Error(LocTy L, const std::string &Msg) const { |
| 129 | return Lex.Error(L, Msg); |
| 130 | } |
| 131 | bool TokError(const std::string &Msg) const { |
| 132 | return Error(Lex.getLoc(), Msg); |
| 133 | } |
| 134 | |
| 135 | /// GetGlobalVal - Get a value with the specified name or ID, creating a |
| 136 | /// forward reference record if needed. This can return null if the value |
| 137 | /// exists but does not have the right type. |
| 138 | GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc); |
| 139 | GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc); |
| 140 | |
| 141 | // Helper Routines. |
| 142 | bool ParseToken(lltok::Kind T, const char *ErrMsg); |
| 143 | bool EatIfPresent(lltok::Kind T) { |
| 144 | if (Lex.getKind() != T) return false; |
| 145 | Lex.Lex(); |
| 146 | return true; |
| 147 | } |
| 148 | bool ParseOptionalToken(lltok::Kind T, bool &Present) { |
| 149 | if (Lex.getKind() != T) { |
| 150 | Present = false; |
| 151 | } else { |
| 152 | Lex.Lex(); |
| 153 | Present = true; |
| 154 | } |
| 155 | return false; |
| 156 | } |
| 157 | bool ParseStringConstant(std::string &Result); |
| 158 | bool ParseUInt32(unsigned &Val); |
| 159 | bool ParseUInt32(unsigned &Val, LocTy &Loc) { |
| 160 | Loc = Lex.getLoc(); |
| 161 | return ParseUInt32(Val); |
| 162 | } |
| 163 | bool ParseOptionalAddrSpace(unsigned &AddrSpace); |
| 164 | bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind); |
| 165 | bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); |
| 166 | bool ParseOptionalLinkage(unsigned &Linkage) { |
| 167 | bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); |
| 168 | } |
| 169 | bool ParseOptionalVisibility(unsigned &Visibility); |
| 170 | bool ParseOptionalCallingConv(CallingConv::ID &CC); |
| 171 | bool ParseOptionalAlignment(unsigned &Alignment); |
| 172 | bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, |
| 173 | MDNode *> > &); |
| 174 | bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); |
| 175 | bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); |
| 176 | bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) { |
| 177 | bool AteExtraComma; |
| 178 | if (ParseIndexList(Indices, AteExtraComma)) return true; |
| 179 | if (AteExtraComma) |
| 180 | return TokError("expected index"); |
| 181 | return false; |
| 182 | } |
| 183 | |
| 184 | // Top-Level Entities |
| 185 | bool ParseTopLevelEntities(); |
| 186 | bool ValidateEndOfModule(); |
| 187 | bool ParseTargetDefinition(); |
| 188 | bool ParseDepLibs(); |
| 189 | bool ParseModuleAsm(); |
| 190 | bool ParseUnnamedType(); |
| 191 | bool ParseNamedType(); |
| 192 | bool ParseDeclare(); |
| 193 | bool ParseDefine(); |
| 194 | |
| 195 | bool ParseGlobalType(bool &IsConstant); |
| 196 | bool ParseUnnamedGlobal(); |
| 197 | bool ParseNamedGlobal(); |
| 198 | bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, |
| 199 | bool HasLinkage, unsigned Visibility); |
| 200 | bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility); |
| 201 | bool ParseStandaloneMetadata(); |
| 202 | bool ParseNamedMetadata(); |
| 203 | bool ParseMDString(MDString *&Result); |
| 204 | bool ParseMDNodeID(MDNode *&Result); |
| 205 | |
| 206 | // Type Parsing. |
| 207 | bool ParseType(PATypeHolder &Result, bool AllowVoid = false); |
| 208 | bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) { |
| 209 | Loc = Lex.getLoc(); |
| 210 | return ParseType(Result, AllowVoid); |
| 211 | } |
| 212 | bool ParseTypeRec(PATypeHolder &H); |
| 213 | bool ParseStructType(PATypeHolder &H, bool Packed); |
| 214 | bool ParseArrayVectorType(PATypeHolder &H, bool isVector); |
| 215 | bool ParseFunctionType(PATypeHolder &Result); |
| 216 | PATypeHolder HandleUpRefs(const Type *Ty); |
| 217 | |
| 218 | // Function Semantic Analysis. |
| 219 | class PerFunctionState { |
| 220 | LLParser &P; |
| 221 | Function &F; |
| 222 | std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals; |
| 223 | std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs; |
| 224 | std::vector<Value*> NumberedVals; |
| 225 | |
| 226 | /// FunctionNumber - If this is an unnamed function, this is the slot |
| 227 | /// number of it, otherwise it is -1. |
| 228 | int FunctionNumber; |
| 229 | public: |
| 230 | PerFunctionState(LLParser &p, Function &f, int FunctionNumber); |
| 231 | ~PerFunctionState(); |
| 232 | |
| 233 | Function &getFunction() const { return F; } |
| 234 | |
| 235 | bool FinishFunction(); |
| 236 | |
| 237 | /// GetVal - Get a value with the specified name or ID, creating a |
| 238 | /// forward reference record if needed. This can return null if the value |
| 239 | /// exists but does not have the right type. |
| 240 | Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc); |
| 241 | Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc); |
| 242 | |
| 243 | /// SetInstName - After an instruction is parsed and inserted into its |
| 244 | /// basic block, this installs its name. |
| 245 | bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc, |
| 246 | Instruction *Inst); |
| 247 | |
| 248 | /// GetBB - Get a basic block with the specified name or ID, creating a |
| 249 | /// forward reference record if needed. This can return null if the value |
| 250 | /// is not a BasicBlock. |
| 251 | BasicBlock *GetBB(const std::string &Name, LocTy Loc); |
| 252 | BasicBlock *GetBB(unsigned ID, LocTy Loc); |
| 253 | |
| 254 | /// DefineBB - Define the specified basic block, which is either named or |
| 255 | /// unnamed. If there is an error, this returns null otherwise it returns |
| 256 | /// the block being defined. |
| 257 | BasicBlock *DefineBB(const std::string &Name, LocTy Loc); |
| 258 | }; |
| 259 | |
| 260 | bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, |
| 261 | PerFunctionState *PFS); |
| 262 | |
| 263 | bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); |
| 264 | bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, |
| 265 | PerFunctionState &PFS) { |
| 266 | Loc = Lex.getLoc(); |
| 267 | return ParseValue(Ty, V, PFS); |
| 268 | } |
| 269 | |
| 270 | bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS); |
| 271 | bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) { |
| 272 | Loc = Lex.getLoc(); |
| 273 | return ParseTypeAndValue(V, PFS); |
| 274 | } |
| 275 | bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, |
| 276 | PerFunctionState &PFS); |
| 277 | bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) { |
| 278 | LocTy Loc; |
| 279 | return ParseTypeAndBasicBlock(BB, Loc, PFS); |
| 280 | } |
| 281 | |
| 282 | struct ParamInfo { |
| 283 | LocTy Loc; |
| 284 | Value *V; |
| 285 | unsigned Attrs; |
| 286 | ParamInfo(LocTy loc, Value *v, unsigned attrs) |
| 287 | : Loc(loc), V(v), Attrs(attrs) {} |
| 288 | }; |
| 289 | bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, |
| 290 | PerFunctionState &PFS); |
| 291 | |
| 292 | // Constant Parsing. |
| 293 | bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); |
| 294 | bool ParseGlobalValue(const Type *Ty, Constant *&V); |
| 295 | bool ParseGlobalTypeAndValue(Constant *&V); |
| 296 | bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); |
| 297 | bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); |
| 298 | |
| 299 | // Function Parsing. |
| 300 | struct ArgInfo { |
| 301 | LocTy Loc; |
| 302 | PATypeHolder Type; |
| 303 | unsigned Attrs; |
| 304 | std::string Name; |
| 305 | ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N) |
| 306 | : Loc(L), Type(Ty), Attrs(Attr), Name(N) {} |
| 307 | }; |
| 308 | bool ParseArgumentList(std::vector<ArgInfo> &ArgList, |
| 309 | bool &isVarArg, bool inType); |
| 310 | bool ParseFunctionHeader(Function *&Fn, bool isDefine); |
| 311 | bool ParseFunctionBody(Function &Fn); |
| 312 | bool ParseBasicBlock(PerFunctionState &PFS); |
| 313 | |
| 314 | // Instruction Parsing. Each instruction parsing routine can return with a |
| 315 | // normal result, an error result, or return having eaten an extra comma. |
| 316 | enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 }; |
| 317 | int ParseInstruction(Instruction *&Inst, BasicBlock *BB, |
| 318 | PerFunctionState &PFS); |
| 319 | bool ParseCmpPredicate(unsigned &Pred, unsigned Opc); |
| 320 | |
| 321 | int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); |
| 322 | bool ParseBr(Instruction *&Inst, PerFunctionState &PFS); |
| 323 | bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); |
| 324 | bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); |
| 325 | bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); |
| 326 | |
| 327 | bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, |
| 328 | unsigned OperandType); |
| 329 | bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc); |
| 330 | bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc); |
| 331 | bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc); |
| 332 | bool ParseSelect(Instruction *&I, PerFunctionState &PFS); |
| 333 | bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS); |
| 334 | bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS); |
| 335 | bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS); |
| 336 | bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); |
| 337 | int ParsePHI(Instruction *&I, PerFunctionState &PFS); |
| 338 | bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); |
| 339 | int ParseAlloc(Instruction *&I, PerFunctionState &PFS, |
| 340 | BasicBlock *BB = 0, bool isAlloca = true); |
| 341 | bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB); |
| 342 | int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); |
| 343 | int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); |
| 344 | bool ParseGetResult(Instruction *&I, PerFunctionState &PFS); |
| 345 | int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); |
| 346 | int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); |
| 347 | int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); |
| 348 | |
| 349 | bool ResolveForwardRefBlockAddresses(Function *TheFn, |
| 350 | std::vector<std::pair<ValID, GlobalValue*> > &Refs, |
| 351 | PerFunctionState *PFS); |
| 352 | }; |
| 353 | } // End llvm namespace |
| 354 | |
| 355 | #endif |