blob: b47ae910c7ca450abf5fd7434a028a72cce84b07 [file] [log] [blame]
Chris Lattner32eecb02006-02-14 05:14:46 +00001/*===-- Lexer.l - Scanner for llvm assembly files --------------*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the flex scanner for LLVM assembly languages files.
11//
12//===----------------------------------------------------------------------===*/
13
14%option prefix="llvmAsm"
15%option yylineno
16%option nostdinit
17%option never-interactive
18%option batch
19%option noyywrap
20%option nodefault
21%option 8bit
22%option outfile="Lexer.cpp"
23%option ecs
24%option noreject
25%option noyymore
26
27%{
28#include "ParserInternals.h"
29#include "llvm/Module.h"
30#include <list>
31#include "llvmAsmParser.h"
32#include <cctype>
33#include <cstdlib>
34
35void set_scan_file(FILE * F){
36 yy_switch_to_buffer(yy_create_buffer( F, YY_BUF_SIZE ) );
37}
38void set_scan_string (const char * str) {
39 yy_scan_string (str);
40}
41
Reid Spencer3ed469c2006-11-02 20:25:50 +000042// Construct a token value for a non-obsolete token
Chris Lattner32eecb02006-02-14 05:14:46 +000043#define RET_TOK(type, Enum, sym) \
Reid Spencer3ed469c2006-11-02 20:25:50 +000044 llvmAsmlval.type.opcode = Instruction::Enum; \
45 llvmAsmlval.type.obsolete = false; \
46 return sym
47
48// Construct a token value for an obsolete token
49#define RET_TOK_OBSOLETE(type, Enum, sym) \
50 llvmAsmlval.type.opcode = Instruction::Enum; \
51 llvmAsmlval.type.obsolete = true; \
52 return sym
53
Reid Spencer3da59db2006-11-27 01:05:10 +000054// Construct a token value for a non-obsolete type
55#define RET_TY(CType, sym) \
56 llvmAsmlval.TypeVal.type = new PATypeHolder(CType); \
57 llvmAsmlval.TypeVal.signedness = isSignless; \
58 return sym
59
60// Construct a token value for an obsolete token
61#define RET_TY_OBSOLETE(CType, sign, sym) \
62 llvmAsmlval.TypeVal.type = new PATypeHolder(CType); \
63 llvmAsmlval.TypeVal.signedness = sign; \
64 return sym
Chris Lattner32eecb02006-02-14 05:14:46 +000065
66namespace llvm {
67
68// TODO: All of the static identifiers are figured out by the lexer,
69// these should be hashed to reduce the lexer size
70
71
72// atoull - Convert an ascii string of decimal digits into the unsigned long
73// long representation... this does not have to do input error checking,
74// because we know that the input will be matched by a suitable regex...
75//
76static uint64_t atoull(const char *Buffer) {
77 uint64_t Result = 0;
78 for (; *Buffer; Buffer++) {
79 uint64_t OldRes = Result;
80 Result *= 10;
81 Result += *Buffer-'0';
82 if (Result < OldRes) // Uh, oh, overflow detected!!!
Reid Spencer61c83e02006-08-18 08:43:06 +000083 GenerateError("constant bigger than 64 bits detected!");
Chris Lattner32eecb02006-02-14 05:14:46 +000084 }
85 return Result;
86}
87
88static uint64_t HexIntToVal(const char *Buffer) {
89 uint64_t Result = 0;
90 for (; *Buffer; ++Buffer) {
91 uint64_t OldRes = Result;
92 Result *= 16;
93 char C = *Buffer;
94 if (C >= '0' && C <= '9')
95 Result += C-'0';
96 else if (C >= 'A' && C <= 'F')
97 Result += C-'A'+10;
98 else if (C >= 'a' && C <= 'f')
99 Result += C-'a'+10;
100
101 if (Result < OldRes) // Uh, oh, overflow detected!!!
Reid Spencer61c83e02006-08-18 08:43:06 +0000102 GenerateError("constant bigger than 64 bits detected!");
Chris Lattner32eecb02006-02-14 05:14:46 +0000103 }
104 return Result;
105}
106
107
108// HexToFP - Convert the ascii string in hexidecimal format to the floating
109// point representation of it.
110//
111static double HexToFP(const char *Buffer) {
112 // Behave nicely in the face of C TBAA rules... see:
113 // http://www.nullstone.com/htmls/category/aliastyp.htm
114 union {
115 uint64_t UI;
116 double FP;
117 } UIntToFP;
118 UIntToFP.UI = HexIntToVal(Buffer);
119
120 assert(sizeof(double) == sizeof(uint64_t) &&
121 "Data sizes incompatible on this target!");
122 return UIntToFP.FP; // Cast Hex constant to double
123}
124
125
126// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
127// appropriate character. If AllowNull is set to false, a \00 value will cause
128// an exception to be thrown.
129//
130// If AllowNull is set to true, the return value of the function points to the
131// last character of the string in memory.
132//
133char *UnEscapeLexed(char *Buffer, bool AllowNull) {
134 char *BOut = Buffer;
135 for (char *BIn = Buffer; *BIn; ) {
136 if (BIn[0] == '\\' && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
137 char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
138 *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
139 if (!AllowNull && !*BOut)
Reid Spencer61c83e02006-08-18 08:43:06 +0000140 GenerateError("String literal cannot accept \\00 escape!");
Chris Lattner32eecb02006-02-14 05:14:46 +0000141
142 BIn[3] = Tmp; // Restore character
143 BIn += 3; // Skip over handled chars
144 ++BOut;
145 } else {
146 *BOut++ = *BIn++;
147 }
148 }
149
150 return BOut;
151}
152
153} // End llvm namespace
154
155using namespace llvm;
156
157#define YY_NEVER_INTERACTIVE 1
158%}
159
160
161
162/* Comments start with a ; and go till end of line */
163Comment ;.*
164
165/* Variable(Value) identifiers start with a % sign */
166VarID %[-a-zA-Z$._][-a-zA-Z$._0-9]*
167
168/* Label identifiers end with a colon */
169Label [-a-zA-Z$._0-9]+:
170QuoteLabel \"[^\"]+\":
171
172/* Quoted names can contain any character except " and \ */
173StringConstant \"[^\"]*\"
174
175
176/* [PN]Integer: match positive and negative literal integer values that
177 * are preceeded by a '%' character. These represent unnamed variable slots.
178 */
179EPInteger %[0-9]+
180ENInteger %-[0-9]+
181
182
183/* E[PN]Integer: match positive and negative literal integer values */
184PInteger [0-9]+
185NInteger -[0-9]+
186
187/* FPConstant - A Floating point constant.
188 */
189FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
190
191/* HexFPConstant - Floating point constant represented in IEEE format as a
192 * hexadecimal number for when exponential notation is not precise enough.
193 */
194HexFPConstant 0x[0-9A-Fa-f]+
195
196/* HexIntConstant - Hexadecimal constant generated by the CFE to avoid forcing
197 * it to deal with 64 bit numbers.
198 */
199HexIntConstant [us]0x[0-9A-Fa-f]+
200%%
201
202{Comment} { /* Ignore comments for now */ }
203
204begin { return BEGINTOK; }
205end { return ENDTOK; }
206true { return TRUETOK; }
207false { return FALSETOK; }
208declare { return DECLARE; }
209global { return GLOBAL; }
210constant { return CONSTANT; }
211internal { return INTERNAL; }
212linkonce { return LINKONCE; }
213weak { return WEAK; }
214appending { return APPENDING; }
Anton Korobeynikovb74ed072006-09-14 18:23:27 +0000215dllimport { return DLLIMPORT; }
216dllexport { return DLLEXPORT; }
217extern_weak { return EXTERN_WEAK; }
Chris Lattner32eecb02006-02-14 05:14:46 +0000218uninitialized { return EXTERNAL; } /* Deprecated, turn into external */
219external { return EXTERNAL; }
220implementation { return IMPLEMENTATION; }
221zeroinitializer { return ZEROINITIALIZER; }
222\.\.\. { return DOTDOTDOT; }
223undef { return UNDEF; }
224null { return NULL_TOK; }
225to { return TO; }
226except { RET_TOK(TermOpVal, Unwind, UNWIND); }
227not { return NOT; } /* Deprecated, turned into XOR */
228tail { return TAIL; }
229target { return TARGET; }
230triple { return TRIPLE; }
231deplibs { return DEPLIBS; }
232endian { return ENDIAN; }
233pointersize { return POINTERSIZE; }
Chris Lattner1ae022f2006-10-22 06:08:13 +0000234datalayout { return DATALAYOUT; }
Chris Lattner32eecb02006-02-14 05:14:46 +0000235little { return LITTLE; }
236big { return BIG; }
237volatile { return VOLATILE; }
238align { return ALIGN; }
239section { return SECTION; }
240module { return MODULE; }
241asm { return ASM_TOK; }
242sideeffect { return SIDEEFFECT; }
243
244cc { return CC_TOK; }
245ccc { return CCC_TOK; }
Chris Lattner75466192006-05-19 21:28:53 +0000246csretcc { return CSRETCC_TOK; }
Chris Lattner32eecb02006-02-14 05:14:46 +0000247fastcc { return FASTCC_TOK; }
248coldcc { return COLDCC_TOK; }
Anton Korobeynikovbcb97702006-09-17 20:25:45 +0000249x86_stdcallcc { return X86_STDCALLCC_TOK; }
250x86_fastcallcc { return X86_FASTCALLCC_TOK; }
Chris Lattner32eecb02006-02-14 05:14:46 +0000251
Reid Spencer3da59db2006-11-27 01:05:10 +0000252void { RET_TY(Type::VoidTy, VOID); }
253bool { RET_TY(Type::BoolTy, BOOL); }
254sbyte { RET_TY_OBSOLETE(Type::SByteTy, isSigned, SBYTE); }
255ubyte { RET_TY_OBSOLETE(Type::UByteTy, isUnsigned, UBYTE); }
256short { RET_TY_OBSOLETE(Type::ShortTy, isSigned, SHORT); }
257ushort { RET_TY_OBSOLETE(Type::UShortTy,isUnsigned, USHORT); }
258int { RET_TY_OBSOLETE(Type::IntTy, isSigned, INT); }
259uint { RET_TY_OBSOLETE(Type::UIntTy, isUnsigned, UINT); }
260long { RET_TY_OBSOLETE(Type::LongTy, isSigned, LONG); }
261ulong { RET_TY_OBSOLETE(Type::ULongTy, isUnsigned, ULONG); }
262float { RET_TY(Type::FloatTy, FLOAT); }
263double { RET_TY(Type::DoubleTy, DOUBLE); }
264label { RET_TY(Type::LabelTy, LABEL); }
Chris Lattner32eecb02006-02-14 05:14:46 +0000265type { return TYPE; }
266opaque { return OPAQUE; }
267
268add { RET_TOK(BinaryOpVal, Add, ADD); }
269sub { RET_TOK(BinaryOpVal, Sub, SUB); }
270mul { RET_TOK(BinaryOpVal, Mul, MUL); }
Reid Spencer3ed469c2006-11-02 20:25:50 +0000271div { RET_TOK_OBSOLETE(BinaryOpVal, UDiv, UDIV); }
272udiv { RET_TOK(BinaryOpVal, UDiv, UDIV); }
273sdiv { RET_TOK(BinaryOpVal, SDiv, SDIV); }
274fdiv { RET_TOK(BinaryOpVal, FDiv, FDIV); }
275rem { RET_TOK_OBSOLETE(BinaryOpVal, URem, UREM); }
276urem { RET_TOK(BinaryOpVal, URem, UREM); }
277srem { RET_TOK(BinaryOpVal, SRem, SREM); }
278frem { RET_TOK(BinaryOpVal, FRem, FREM); }
Chris Lattner32eecb02006-02-14 05:14:46 +0000279and { RET_TOK(BinaryOpVal, And, AND); }
280or { RET_TOK(BinaryOpVal, Or , OR ); }
281xor { RET_TOK(BinaryOpVal, Xor, XOR); }
282setne { RET_TOK(BinaryOpVal, SetNE, SETNE); }
283seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); }
284setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); }
285setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); }
286setle { RET_TOK(BinaryOpVal, SetLE, SETLE); }
287setge { RET_TOK(BinaryOpVal, SetGE, SETGE); }
288
289phi { RET_TOK(OtherOpVal, PHI, PHI_TOK); }
290call { RET_TOK(OtherOpVal, Call, CALL); }
Reid Spencer3da59db2006-11-27 01:05:10 +0000291cast { RET_TOK_OBSOLETE(CastOpVal, Trunc, TRUNC); }
292trunc { RET_TOK(CastOpVal, Trunc, TRUNC); }
293zext { RET_TOK(CastOpVal, ZExt, ZEXT); }
294sext { RET_TOK(CastOpVal, SExt, SEXT); }
295fptrunc { RET_TOK(CastOpVal, FPTrunc, FPTRUNC); }
296fpext { RET_TOK(CastOpVal, FPExt, FPEXT); }
297uitofp { RET_TOK(CastOpVal, UIToFP, UITOFP); }
298sitofp { RET_TOK(CastOpVal, SIToFP, SITOFP); }
299fptoui { RET_TOK(CastOpVal, FPToUI, FPTOUI); }
300fptosi { RET_TOK(CastOpVal, FPToSI, FPTOSI); }
301inttoptr { RET_TOK(CastOpVal, IntToPtr, INTTOPTR); }
302ptrtoint { RET_TOK(CastOpVal, PtrToInt, PTRTOINT); }
303bitcast { RET_TOK(CastOpVal, BitCast, BITCAST); }
Chris Lattner32eecb02006-02-14 05:14:46 +0000304select { RET_TOK(OtherOpVal, Select, SELECT); }
305shl { RET_TOK(OtherOpVal, Shl, SHL); }
Reid Spencer3da59db2006-11-27 01:05:10 +0000306shr { RET_TOK_OBSOLETE(OtherOpVal, LShr, LSHR); }
307lshr { RET_TOK(OtherOpVal, LShr, LSHR); }
308ashr { RET_TOK(OtherOpVal, AShr, ASHR); }
Chris Lattner32eecb02006-02-14 05:14:46 +0000309vanext { return VANEXT_old; }
310vaarg { return VAARG_old; }
311va_arg { RET_TOK(OtherOpVal, VAArg , VAARG); }
312ret { RET_TOK(TermOpVal, Ret, RET); }
313br { RET_TOK(TermOpVal, Br, BR); }
314switch { RET_TOK(TermOpVal, Switch, SWITCH); }
315invoke { RET_TOK(TermOpVal, Invoke, INVOKE); }
316unwind { RET_TOK(TermOpVal, Unwind, UNWIND); }
317unreachable { RET_TOK(TermOpVal, Unreachable, UNREACHABLE); }
318
319malloc { RET_TOK(MemOpVal, Malloc, MALLOC); }
320alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); }
321free { RET_TOK(MemOpVal, Free, FREE); }
322load { RET_TOK(MemOpVal, Load, LOAD); }
323store { RET_TOK(MemOpVal, Store, STORE); }
324getelementptr { RET_TOK(MemOpVal, GetElementPtr, GETELEMENTPTR); }
325
326extractelement { RET_TOK(OtherOpVal, ExtractElement, EXTRACTELEMENT); }
327insertelement { RET_TOK(OtherOpVal, InsertElement, INSERTELEMENT); }
Chris Lattnerd5efe842006-04-08 01:18:56 +0000328shufflevector { RET_TOK(OtherOpVal, ShuffleVector, SHUFFLEVECTOR); }
Chris Lattner32eecb02006-02-14 05:14:46 +0000329
330
331{VarID} {
332 UnEscapeLexed(yytext+1);
333 llvmAsmlval.StrVal = strdup(yytext+1); // Skip %
334 return VAR_ID;
335 }
336{Label} {
337 yytext[strlen(yytext)-1] = 0; // nuke colon
338 UnEscapeLexed(yytext);
339 llvmAsmlval.StrVal = strdup(yytext);
340 return LABELSTR;
341 }
342{QuoteLabel} {
343 yytext[strlen(yytext)-2] = 0; // nuke colon, end quote
344 UnEscapeLexed(yytext+1);
345 llvmAsmlval.StrVal = strdup(yytext+1);
346 return LABELSTR;
347 }
348
349{StringConstant} { // Note that we cannot unescape a string constant here! The
350 // string constant might contain a \00 which would not be
351 // understood by the string stuff. It is valid to make a
352 // [sbyte] c"Hello World\00" constant, for example.
353 //
354 yytext[strlen(yytext)-1] = 0; // nuke end quote
355 llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote
356 return STRINGCONSTANT;
357 }
358
359
360{PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; }
361{NInteger} {
362 uint64_t Val = atoull(yytext+1);
363 // +1: we have bigger negative range
364 if (Val > (uint64_t)INT64_MAX+1)
Reid Spencer61c83e02006-08-18 08:43:06 +0000365 GenerateError("Constant too large for signed 64 bits!");
Chris Lattner32eecb02006-02-14 05:14:46 +0000366 llvmAsmlval.SInt64Val = -Val;
367 return ESINT64VAL;
368 }
369{HexIntConstant} {
370 llvmAsmlval.UInt64Val = HexIntToVal(yytext+3);
371 return yytext[0] == 's' ? ESINT64VAL : EUINT64VAL;
372 }
373
374{EPInteger} {
375 uint64_t Val = atoull(yytext+1);
376 if ((unsigned)Val != Val)
Reid Spencer61c83e02006-08-18 08:43:06 +0000377 GenerateError("Invalid value number (too large)!");
Chris Lattner32eecb02006-02-14 05:14:46 +0000378 llvmAsmlval.UIntVal = unsigned(Val);
379 return UINTVAL;
380 }
381{ENInteger} {
382 uint64_t Val = atoull(yytext+2);
383 // +1: we have bigger negative range
384 if (Val > (uint64_t)INT32_MAX+1)
Reid Spencer61c83e02006-08-18 08:43:06 +0000385 GenerateError("Constant too large for signed 32 bits!");
Chris Lattner32eecb02006-02-14 05:14:46 +0000386 llvmAsmlval.SIntVal = (int)-Val;
387 return SINTVAL;
388 }
389
390{FPConstant} { llvmAsmlval.FPVal = atof(yytext); return FPVAL; }
391{HexFPConstant} { llvmAsmlval.FPVal = HexToFP(yytext); return FPVAL; }
392
393<<EOF>> {
394 /* Make sure to free the internal buffers for flex when we are
395 * done reading our input!
396 */
397 yy_delete_buffer(YY_CURRENT_BUFFER);
398 return EOF;
399 }
400
401[ \r\t\n] { /* Ignore whitespace */ }
402. { return yytext[0]; }
403
404%%