Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 1 | /*===-- Lexer.l - Scanner for llvm assembly files ----------------*- C++ -*--=// |
| 2 | // |
| 3 | // This file implements the flex scanner for LLVM assembly languages files. |
| 4 | // |
| 5 | //===------------------------------------------------------------------------=*/ |
| 6 | |
| 7 | %option prefix="llvmAsm" |
| 8 | %option yylineno |
| 9 | %option nostdinit |
| 10 | %option never-interactive |
| 11 | %option batch |
| 12 | %option noyywrap |
| 13 | %option nodefault |
| 14 | %option 8bit |
| 15 | %option outfile="Lexer.cpp" |
| 16 | %option ecs |
| 17 | %option noreject |
| 18 | %option noyymore |
| 19 | |
| 20 | %{ |
| 21 | #include "ParserInternals.h" |
| 22 | #include "llvm/BasicBlock.h" |
| 23 | #include "llvm/Method.h" |
| 24 | #include "llvm/Module.h" |
| 25 | #include <list> |
| 26 | #include "llvmAsmParser.h" |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 27 | #include <ctype.h> |
| 28 | #include <stdlib.h> |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 29 | |
| 30 | #define RET_TOK(type, Enum, sym) \ |
| 31 | llvmAsmlval.type = Instruction::Enum; return sym |
| 32 | |
| 33 | |
| 34 | // TODO: All of the static identifiers are figured out by the lexer, |
Chris Lattner | e1fe875 | 2001-09-07 16:32:43 +0000 | [diff] [blame^] | 35 | // these should be hashed to reduce the lexer size |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 36 | |
| 37 | |
| 38 | // atoull - Convert an ascii string of decimal digits into the unsigned long |
| 39 | // long representation... this does not have to do input error checking, |
| 40 | // because we know that the input will be matched by a suitable regex... |
| 41 | // |
| 42 | uint64_t atoull(const char *Buffer) { |
| 43 | uint64_t Result = 0; |
| 44 | for (; *Buffer; Buffer++) { |
| 45 | uint64_t OldRes = Result; |
| 46 | Result *= 10; |
| 47 | Result += *Buffer-'0'; |
| 48 | if (Result < OldRes) { // Uh, oh, overflow detected!!! |
| 49 | ThrowException("constant bigger than 64 bits detected!"); |
| 50 | } |
| 51 | } |
| 52 | return Result; |
| 53 | } |
| 54 | |
| 55 | |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 56 | // UnEscapeLexed - Run through the specified buffer and change \xx codes to the |
| 57 | // appropriate character. If AllowNull is set to false, a \00 value will cause |
| 58 | // an exception to be thrown. |
| 59 | // |
| 60 | // If AllowNull is set to true, the return value of the function points to the |
| 61 | // last character of the string in memory. |
| 62 | // |
| 63 | char *UnEscapeLexed(char *Buffer, bool AllowNull = false) { |
| 64 | char *BOut = Buffer; |
| 65 | for (char *BIn = Buffer; *BIn; ) { |
| 66 | if (BIn[0] == '\\' && isxdigit(BIn[1]) && isxdigit(BIn[2])) { |
| 67 | char Tmp = BIn[3]; BIn[3] = 0; // Terminate string |
| 68 | *BOut = strtol(BIn+1, 0, 16); // Convert to number |
| 69 | if (!AllowNull && !*BOut) |
| 70 | ThrowException("String literal cannot accept \\00 escape!"); |
| 71 | |
| 72 | BIn[3] = Tmp; // Restore character |
| 73 | BIn += 3; // Skip over handled chars |
| 74 | ++BOut; |
| 75 | } else { |
| 76 | *BOut++ = *BIn++; |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | return BOut; |
| 81 | } |
| 82 | |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 83 | #define YY_NEVER_INTERACTIVE 1 |
| 84 | %} |
| 85 | |
| 86 | |
| 87 | |
| 88 | /* Comments start with a ; and go till end of line */ |
| 89 | Comment ;.* |
| 90 | |
Chris Lattner | e181564 | 2001-07-15 06:35:53 +0000 | [diff] [blame] | 91 | /* Variable(Value) identifiers start with a % sign */ |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 92 | VarID %[a-zA-Z$._][a-zA-Z$._0-9]* |
| 93 | |
| 94 | /* Label identifiers end with a colon */ |
| 95 | Label [a-zA-Z$._0-9]+: |
| 96 | |
| 97 | /* Quoted names can contain any character except " and \ */ |
| 98 | StringConstant \"[^\"]+\" |
| 99 | |
| 100 | |
| 101 | /* [PN]Integer: match positive and negative literal integer values that |
| 102 | * are preceeded by a '%' character. These represent unnamed variable slots. |
| 103 | */ |
| 104 | EPInteger %[0-9]+ |
| 105 | ENInteger %-[0-9]+ |
| 106 | |
| 107 | |
| 108 | /* E[PN]Integer: match positive and negative literal integer values */ |
| 109 | PInteger [0-9]+ |
| 110 | NInteger -[0-9]+ |
| 111 | |
Chris Lattner | 3d52b2f | 2001-07-15 00:17:01 +0000 | [diff] [blame] | 112 | /* FPConstant - A Floating point constant. |
| 113 | TODO: Expand lexer to support 10e50 FP constant notation */ |
| 114 | FPConstant [0-9]+[.][0-9]* |
| 115 | |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 116 | %% |
| 117 | |
| 118 | {Comment} { /* Ignore comments for now */ } |
| 119 | |
| 120 | begin { return BEGINTOK; } |
| 121 | end { return END; } |
| 122 | true { return TRUE; } |
| 123 | false { return FALSE; } |
| 124 | declare { return DECLARE; } |
| 125 | implementation { return IMPLEMENTATION; } |
Chris Lattner | 8b81bf5 | 2001-07-25 22:47:46 +0000 | [diff] [blame] | 126 | \.\.\. { return DOTDOTDOT; } |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 127 | string { return STRING; } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 128 | |
Chris Lattner | e1fe875 | 2001-09-07 16:32:43 +0000 | [diff] [blame^] | 129 | void { llvmAsmlval.PrimType = Type::VoidTy ; return VOID; } |
| 130 | bool { llvmAsmlval.PrimType = Type::BoolTy ; return BOOL; } |
| 131 | sbyte { llvmAsmlval.PrimType = Type::SByteTy ; return SBYTE; } |
| 132 | ubyte { llvmAsmlval.PrimType = Type::UByteTy ; return UBYTE; } |
| 133 | short { llvmAsmlval.PrimType = Type::ShortTy ; return SHORT; } |
| 134 | ushort { llvmAsmlval.PrimType = Type::UShortTy; return USHORT; } |
| 135 | int { llvmAsmlval.PrimType = Type::IntTy ; return INT; } |
| 136 | uint { llvmAsmlval.PrimType = Type::UIntTy ; return UINT; } |
| 137 | long { llvmAsmlval.PrimType = Type::LongTy ; return LONG; } |
| 138 | ulong { llvmAsmlval.PrimType = Type::ULongTy ; return ULONG; } |
| 139 | float { llvmAsmlval.PrimType = Type::FloatTy ; return FLOAT; } |
| 140 | double { llvmAsmlval.PrimType = Type::DoubleTy; return DOUBLE; } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 141 | |
Chris Lattner | e1fe875 | 2001-09-07 16:32:43 +0000 | [diff] [blame^] | 142 | type { llvmAsmlval.PrimType = Type::TypeTy ; return TYPE; } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 143 | |
Chris Lattner | e1fe875 | 2001-09-07 16:32:43 +0000 | [diff] [blame^] | 144 | label { llvmAsmlval.PrimType = Type::LabelTy ; return LABEL; } |
| 145 | opaque { llvmAsmlval.TypeVal = |
| 146 | new PATypeHolder<Type>(OpaqueType::get()); |
| 147 | return OPAQUE; |
| 148 | } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 149 | |
Chris Lattner | 027dcc5 | 2001-07-08 21:10:27 +0000 | [diff] [blame] | 150 | |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 151 | not { RET_TOK(UnaryOpVal, Not, NOT); } |
| 152 | |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 153 | add { RET_TOK(BinaryOpVal, Add, ADD); } |
| 154 | sub { RET_TOK(BinaryOpVal, Sub, SUB); } |
| 155 | mul { RET_TOK(BinaryOpVal, Mul, MUL); } |
| 156 | div { RET_TOK(BinaryOpVal, Div, DIV); } |
| 157 | rem { RET_TOK(BinaryOpVal, Rem, REM); } |
| 158 | setne { RET_TOK(BinaryOpVal, SetNE, SETNE); } |
| 159 | seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); } |
| 160 | setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); } |
| 161 | setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); } |
| 162 | setle { RET_TOK(BinaryOpVal, SetLE, SETLE); } |
| 163 | setge { RET_TOK(BinaryOpVal, SetGE, SETGE); } |
| 164 | |
Chris Lattner | 027dcc5 | 2001-07-08 21:10:27 +0000 | [diff] [blame] | 165 | to { return TO; } |
| 166 | phi { RET_TOK(OtherOpVal, PHINode, PHI); } |
| 167 | call { RET_TOK(OtherOpVal, Call, CALL); } |
| 168 | cast { RET_TOK(OtherOpVal, Cast, CAST); } |
| 169 | shl { RET_TOK(OtherOpVal, Shl, SHL); } |
| 170 | shr { RET_TOK(OtherOpVal, Shr, SHR); } |
| 171 | |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 172 | ret { RET_TOK(TermOpVal, Ret, RET); } |
| 173 | br { RET_TOK(TermOpVal, Br, BR); } |
| 174 | switch { RET_TOK(TermOpVal, Switch, SWITCH); } |
| 175 | |
| 176 | |
| 177 | malloc { RET_TOK(MemOpVal, Malloc, MALLOC); } |
| 178 | alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); } |
| 179 | free { RET_TOK(MemOpVal, Free, FREE); } |
| 180 | load { RET_TOK(MemOpVal, Load, LOAD); } |
| 181 | store { RET_TOK(MemOpVal, Store, STORE); } |
Chris Lattner | ab5ac6b | 2001-07-08 23:22:50 +0000 | [diff] [blame] | 182 | getelementptr { RET_TOK(MemOpVal, GetElementPtr, GETELEMENTPTR); } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 183 | |
| 184 | |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 185 | {VarID} { |
| 186 | UnEscapeLexed(yytext+1); |
| 187 | llvmAsmlval.StrVal = strdup(yytext+1); // Skip % |
| 188 | return VAR_ID; |
| 189 | } |
| 190 | {Label} { |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 191 | yytext[strlen(yytext)-1] = 0; // nuke colon |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 192 | UnEscapeLexed(yytext); |
| 193 | llvmAsmlval.StrVal = strdup(yytext); |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 194 | return LABELSTR; |
| 195 | } |
| 196 | |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 197 | {StringConstant} { // Note that we cannot unescape a string constant here! The |
| 198 | // string constant might contain a \00 which would not be |
| 199 | // understood by the string stuff. It is valid to make a |
| 200 | // [sbyte] c"Hello World\00" constant, for example. |
| 201 | // |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 202 | yytext[strlen(yytext)-1] = 0; // nuke end quote |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 203 | llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 204 | return STRINGCONSTANT; |
Chris Lattner | 93750fa | 2001-07-28 17:48:55 +0000 | [diff] [blame] | 205 | } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 206 | |
| 207 | |
| 208 | {PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; } |
| 209 | {NInteger} { |
| 210 | uint64_t Val = atoull(yytext+1); |
| 211 | // +1: we have bigger negative range |
| 212 | if (Val > (uint64_t)INT64_MAX+1) |
| 213 | ThrowException("Constant too large for signed 64 bits!"); |
| 214 | llvmAsmlval.SInt64Val = -Val; |
| 215 | return ESINT64VAL; |
| 216 | } |
| 217 | |
| 218 | |
| 219 | {EPInteger} { llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; } |
| 220 | {ENInteger} { |
| 221 | uint64_t Val = atoull(yytext+2); |
| 222 | // +1: we have bigger negative range |
| 223 | if (Val > (uint64_t)INT32_MAX+1) |
| 224 | ThrowException("Constant too large for signed 32 bits!"); |
| 225 | llvmAsmlval.SIntVal = -Val; |
| 226 | return SINTVAL; |
| 227 | } |
| 228 | |
Chris Lattner | 3d52b2f | 2001-07-15 00:17:01 +0000 | [diff] [blame] | 229 | {FPConstant} { llvmAsmlval.FPVal = atof(yytext); return FPVAL; } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 230 | |
| 231 | [ \t\n] { /* Ignore whitespace */ } |
Chris Lattner | e1fe875 | 2001-09-07 16:32:43 +0000 | [diff] [blame^] | 232 | . { return yytext[0]; } |
Chris Lattner | 0095054 | 2001-06-06 20:29:01 +0000 | [diff] [blame] | 233 | |
| 234 | %% |