Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 1 | /*===-- Lexer.l - Scanner for llvm assembly files ----------------*- C++ -*--=// |
| 2 | // |
| 3 | // This file implements the flex scanner for LLVM assembly languages files. |
| 4 | // |
| 5 | //===------------------------------------------------------------------------=*/ |
| 6 | |
| 7 | %option prefix="llvmAsm" |
| 8 | %option yylineno |
| 9 | %option nostdinit |
| 10 | %option never-interactive |
| 11 | %option batch |
| 12 | %option noyywrap |
| 13 | %option nodefault |
| 14 | %option 8bit |
| 15 | %option outfile="Lexer.cpp" |
| 16 | %option ecs |
| 17 | %option noreject |
| 18 | %option noyymore |
| 19 | |
| 20 | %{ |
| 21 | #include "ParserInternals.h" |
| 22 | #include "llvm/BasicBlock.h" |
| 23 | #include "llvm/Method.h" |
| 24 | #include "llvm/Module.h" |
| 25 | #include <list> |
| 26 | #include "llvmAsmParser.h" |
| 27 | |
| 28 | #define RET_TOK(type, Enum, sym) \ |
| 29 | llvmAsmlval.type = Instruction::Enum; return sym |
| 30 | |
| 31 | |
| 32 | // TODO: All of the static identifiers are figured out by the lexer, |
| 33 | // these should be hashed. |
| 34 | |
| 35 | |
| 36 | // atoull - Convert an ascii string of decimal digits into the unsigned long |
| 37 | // long representation... this does not have to do input error checking, |
| 38 | // because we know that the input will be matched by a suitable regex... |
| 39 | // |
| 40 | uint64_t atoull(const char *Buffer) { |
| 41 | uint64_t Result = 0; |
| 42 | for (; *Buffer; Buffer++) { |
| 43 | uint64_t OldRes = Result; |
| 44 | Result *= 10; |
| 45 | Result += *Buffer-'0'; |
| 46 | if (Result < OldRes) { // Uh, oh, overflow detected!!! |
| 47 | ThrowException("constant bigger than 64 bits detected!"); |
| 48 | } |
| 49 | } |
| 50 | return Result; |
| 51 | } |
| 52 | |
| 53 | |
| 54 | #define YY_NEVER_INTERACTIVE 1 |
| 55 | %} |
| 56 | |
| 57 | |
| 58 | |
| 59 | /* Comments start with a ; and go till end of line */ |
| 60 | Comment ;.* |
| 61 | |
Chris Lattner | 17f729e | 2001-07-15 06:35:53 +0000 | [diff] [blame] | 62 | /* Variable(Value) identifiers start with a % sign */ |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 63 | VarID %[a-zA-Z$._][a-zA-Z$._0-9]* |
| 64 | |
| 65 | /* Label identifiers end with a colon */ |
| 66 | Label [a-zA-Z$._0-9]+: |
| 67 | |
| 68 | /* Quoted names can contain any character except " and \ */ |
| 69 | StringConstant \"[^\"]+\" |
| 70 | |
| 71 | |
| 72 | /* [PN]Integer: match positive and negative literal integer values that |
| 73 | * are preceeded by a '%' character. These represent unnamed variable slots. |
| 74 | */ |
| 75 | EPInteger %[0-9]+ |
| 76 | ENInteger %-[0-9]+ |
| 77 | |
| 78 | |
| 79 | /* E[PN]Integer: match positive and negative literal integer values */ |
| 80 | PInteger [0-9]+ |
| 81 | NInteger -[0-9]+ |
| 82 | |
Chris Lattner | 212f70d | 2001-07-15 00:17:01 +0000 | [diff] [blame] | 83 | /* FPConstant - A Floating point constant. |
| 84 | TODO: Expand lexer to support 10e50 FP constant notation */ |
| 85 | FPConstant [0-9]+[.][0-9]* |
| 86 | |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 87 | %% |
| 88 | |
| 89 | {Comment} { /* Ignore comments for now */ } |
| 90 | |
| 91 | begin { return BEGINTOK; } |
| 92 | end { return END; } |
| 93 | true { return TRUE; } |
| 94 | false { return FALSE; } |
| 95 | declare { return DECLARE; } |
| 96 | implementation { return IMPLEMENTATION; } |
Chris Lattner | 42b5a8a | 2001-07-25 22:47:46 +0000 | [diff] [blame] | 97 | \.\.\. { return DOTDOTDOT; } |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 98 | |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 99 | void { llvmAsmlval.TypeVal = Type::VoidTy ; return VOID; } |
| 100 | bool { llvmAsmlval.TypeVal = Type::BoolTy ; return BOOL; } |
| 101 | sbyte { llvmAsmlval.TypeVal = Type::SByteTy ; return SBYTE; } |
| 102 | ubyte { llvmAsmlval.TypeVal = Type::UByteTy ; return UBYTE; } |
| 103 | short { llvmAsmlval.TypeVal = Type::ShortTy ; return SHORT; } |
| 104 | ushort { llvmAsmlval.TypeVal = Type::UShortTy; return USHORT; } |
| 105 | int { llvmAsmlval.TypeVal = Type::IntTy ; return INT; } |
| 106 | uint { llvmAsmlval.TypeVal = Type::UIntTy ; return UINT; } |
| 107 | long { llvmAsmlval.TypeVal = Type::LongTy ; return LONG; } |
| 108 | ulong { llvmAsmlval.TypeVal = Type::ULongTy ; return ULONG; } |
| 109 | float { llvmAsmlval.TypeVal = Type::FloatTy ; return FLOAT; } |
| 110 | double { llvmAsmlval.TypeVal = Type::DoubleTy; return DOUBLE; } |
| 111 | |
| 112 | type { llvmAsmlval.TypeVal = Type::TypeTy ; return TYPE; } |
| 113 | |
| 114 | label { llvmAsmlval.TypeVal = Type::LabelTy ; return LABEL; } |
| 115 | |
Chris Lattner | d8bebcd | 2001-07-08 21:10:27 +0000 | [diff] [blame] | 116 | |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 117 | not { RET_TOK(UnaryOpVal, Not, NOT); } |
| 118 | |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 119 | add { RET_TOK(BinaryOpVal, Add, ADD); } |
| 120 | sub { RET_TOK(BinaryOpVal, Sub, SUB); } |
| 121 | mul { RET_TOK(BinaryOpVal, Mul, MUL); } |
| 122 | div { RET_TOK(BinaryOpVal, Div, DIV); } |
| 123 | rem { RET_TOK(BinaryOpVal, Rem, REM); } |
| 124 | setne { RET_TOK(BinaryOpVal, SetNE, SETNE); } |
| 125 | seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); } |
| 126 | setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); } |
| 127 | setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); } |
| 128 | setle { RET_TOK(BinaryOpVal, SetLE, SETLE); } |
| 129 | setge { RET_TOK(BinaryOpVal, SetGE, SETGE); } |
| 130 | |
Chris Lattner | d8bebcd | 2001-07-08 21:10:27 +0000 | [diff] [blame] | 131 | to { return TO; } |
| 132 | phi { RET_TOK(OtherOpVal, PHINode, PHI); } |
| 133 | call { RET_TOK(OtherOpVal, Call, CALL); } |
| 134 | cast { RET_TOK(OtherOpVal, Cast, CAST); } |
| 135 | shl { RET_TOK(OtherOpVal, Shl, SHL); } |
| 136 | shr { RET_TOK(OtherOpVal, Shr, SHR); } |
| 137 | |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 138 | ret { RET_TOK(TermOpVal, Ret, RET); } |
| 139 | br { RET_TOK(TermOpVal, Br, BR); } |
| 140 | switch { RET_TOK(TermOpVal, Switch, SWITCH); } |
| 141 | |
| 142 | |
| 143 | malloc { RET_TOK(MemOpVal, Malloc, MALLOC); } |
| 144 | alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); } |
| 145 | free { RET_TOK(MemOpVal, Free, FREE); } |
| 146 | load { RET_TOK(MemOpVal, Load, LOAD); } |
| 147 | store { RET_TOK(MemOpVal, Store, STORE); } |
Chris Lattner | 62ecb4a | 2001-07-08 23:22:50 +0000 | [diff] [blame] | 148 | getelementptr { RET_TOK(MemOpVal, GetElementPtr, GETELEMENTPTR); } |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 149 | |
| 150 | |
| 151 | {VarID} { llvmAsmlval.StrVal = strdup(yytext+1); return VAR_ID; } |
| 152 | {Label} { |
| 153 | yytext[strlen(yytext)-1] = 0; // nuke colon |
| 154 | llvmAsmlval.StrVal = strdup(yytext); |
| 155 | return LABELSTR; |
| 156 | } |
| 157 | |
| 158 | {StringConstant} { |
| 159 | yytext[strlen(yytext)-1] = 0; // nuke end quote |
| 160 | llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote |
| 161 | return STRINGCONSTANT; |
| 162 | } |
| 163 | |
| 164 | |
| 165 | {PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; } |
| 166 | {NInteger} { |
| 167 | uint64_t Val = atoull(yytext+1); |
| 168 | // +1: we have bigger negative range |
| 169 | if (Val > (uint64_t)INT64_MAX+1) |
| 170 | ThrowException("Constant too large for signed 64 bits!"); |
| 171 | llvmAsmlval.SInt64Val = -Val; |
| 172 | return ESINT64VAL; |
| 173 | } |
| 174 | |
| 175 | |
| 176 | {EPInteger} { llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; } |
| 177 | {ENInteger} { |
| 178 | uint64_t Val = atoull(yytext+2); |
| 179 | // +1: we have bigger negative range |
| 180 | if (Val > (uint64_t)INT32_MAX+1) |
| 181 | ThrowException("Constant too large for signed 32 bits!"); |
| 182 | llvmAsmlval.SIntVal = -Val; |
| 183 | return SINTVAL; |
| 184 | } |
| 185 | |
Chris Lattner | 212f70d | 2001-07-15 00:17:01 +0000 | [diff] [blame] | 186 | {FPConstant} { llvmAsmlval.FPVal = atof(yytext); return FPVAL; } |
Chris Lattner | 2f7c963 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 187 | |
| 188 | [ \t\n] { /* Ignore whitespace */ } |
| 189 | . { /*printf("'%s'", yytext);*/ return yytext[0]; } |
| 190 | |
| 191 | %% |