Blame - lib/AsmParser/Lexer.l.cvs - fp2-dev/platform/external/llvm

blob: 4df84f685e64e34bbc2b025a239266d40c6edaa6 [file] [log] [blame]

Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	1	/===-- Lexer.l - Scanner for llvm assembly files --------------- C++ -*--===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by the LLVM research group and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the flex scanner for LLVM assembly languages files.
				11	//
				12	//===----------------------------------------------------------------------===*/
				13
				14	%option prefix="llvmAsm"
				15	%option yylineno
				16	%option nostdinit
				17	%option never-interactive
				18	%option batch
				19	%option noyywrap
				20	%option nodefault
				21	%option 8bit
				22	%option outfile="Lexer.cpp"
				23	%option ecs
				24	%option noreject
				25	%option noyymore
				26
				27	%{
				28	#include "ParserInternals.h"
				29	#include "llvm/Module.h"
				30	#include <list>
				31	#include "llvmAsmParser.h"
				32	#include <cctype>
				33	#include <cstdlib>
				34
				35	void set_scan_file(FILE * F){
				36	yy_switch_to_buffer(yy_create_buffer( F, YY_BUF_SIZE ) );
				37	}
				38	void set_scan_string (const char * str) {
				39	yy_scan_string (str);
				40	}
				41
Reid Spencer	3ed469c	2006-11-02 20:25:50 +0000	[diff] [blame]	42	// Construct a token value for a non-obsolete token
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	43	#define RET_TOK(type, Enum, sym) \
Reid Spencer	3ed469c	2006-11-02 20:25:50 +0000	[diff] [blame]	44	llvmAsmlval.type.opcode = Instruction::Enum; \
				45	llvmAsmlval.type.obsolete = false; \
				46	return sym
				47
				48	// Construct a token value for an obsolete token
				49	#define RET_TOK_OBSOLETE(type, Enum, sym) \
				50	llvmAsmlval.type.opcode = Instruction::Enum; \
				51	llvmAsmlval.type.obsolete = true; \
				52	return sym
				53
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	54
				55	namespace llvm {
				56
				57	// TODO: All of the static identifiers are figured out by the lexer,
				58	// these should be hashed to reduce the lexer size
				59
				60
				61	// atoull - Convert an ascii string of decimal digits into the unsigned long
				62	// long representation... this does not have to do input error checking,
				63	// because we know that the input will be matched by a suitable regex...
				64	//
				65	static uint64_t atoull(const char *Buffer) {
				66	uint64_t Result = 0;
				67	for (; *Buffer; Buffer++) {
				68	uint64_t OldRes = Result;
				69	Result *= 10;
				70	Result += *Buffer-'0';
				71	if (Result < OldRes) // Uh, oh, overflow detected!!!
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	72	GenerateError("constant bigger than 64 bits detected!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	73	}
				74	return Result;
				75	}
				76
				77	static uint64_t HexIntToVal(const char *Buffer) {
				78	uint64_t Result = 0;
				79	for (; *Buffer; ++Buffer) {
				80	uint64_t OldRes = Result;
				81	Result *= 16;
				82	char C = *Buffer;
				83	if (C >= '0' && C <= '9')
				84	Result += C-'0';
				85	else if (C >= 'A' && C <= 'F')
				86	Result += C-'A'+10;
				87	else if (C >= 'a' && C <= 'f')
				88	Result += C-'a'+10;
				89
				90	if (Result < OldRes) // Uh, oh, overflow detected!!!
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	91	GenerateError("constant bigger than 64 bits detected!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	92	}
				93	return Result;
				94	}
				95
				96
				97	// HexToFP - Convert the ascii string in hexidecimal format to the floating
				98	// point representation of it.
				99	//
				100	static double HexToFP(const char *Buffer) {
				101	// Behave nicely in the face of C TBAA rules... see:
				102	// http://www.nullstone.com/htmls/category/aliastyp.htm
				103	union {
				104	uint64_t UI;
				105	double FP;
				106	} UIntToFP;
				107	UIntToFP.UI = HexIntToVal(Buffer);
				108
				109	assert(sizeof(double) == sizeof(uint64_t) &&
				110	"Data sizes incompatible on this target!");
				111	return UIntToFP.FP; // Cast Hex constant to double
				112	}
				113
				114
				115	// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
				116	// appropriate character. If AllowNull is set to false, a \00 value will cause
				117	// an exception to be thrown.
				118	//
				119	// If AllowNull is set to true, the return value of the function points to the
				120	// last character of the string in memory.
				121	//
				122	char UnEscapeLexed(char Buffer, bool AllowNull) {
				123	char *BOut = Buffer;
				124	for (char BIn = Buffer; BIn; ) {
				125	if (BIn[0] == '\\' && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
				126	char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
				127	*BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
				128	if (!AllowNull && !*BOut)
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	129	GenerateError("String literal cannot accept \\00 escape!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	130
				131	BIn[3] = Tmp; // Restore character
				132	BIn += 3; // Skip over handled chars
				133	++BOut;
				134	} else {
				135	BOut++ = BIn++;
				136	}
				137	}
				138
				139	return BOut;
				140	}
				141
				142	} // End llvm namespace
				143
				144	using namespace llvm;
				145
				146	#define YY_NEVER_INTERACTIVE 1
				147	%}
				148
				149
				150
				151	/* Comments start with a ; and go till end of line */
				152	Comment ;.*
				153
				154	/* Variable(Value) identifiers start with a % sign */
				155	VarID %[-a-zA-Z$._][-a-zA-Z$._0-9]*
				156
				157	/* Label identifiers end with a colon */
				158	Label [-a-zA-Z$._0-9]+:
				159	QuoteLabel \"[^\"]+\":
				160
				161	/* Quoted names can contain any character except " and \ */
				162	StringConstant \"[^\"]*\"
				163
				164
				165	/* [PN]Integer: match positive and negative literal integer values that
				166	* are preceeded by a '%' character. These represent unnamed variable slots.
				167	*/
				168	EPInteger %[0-9]+
				169	ENInteger %-[0-9]+
				170
				171
				172	/* E[PN]Integer: match positive and negative literal integer values */
				173	PInteger [0-9]+
				174	NInteger -[0-9]+
				175
				176	/* FPConstant - A Floating point constant.
				177	*/
				178	FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
				179
				180	/* HexFPConstant - Floating point constant represented in IEEE format as a
				181	* hexadecimal number for when exponential notation is not precise enough.
				182	*/
				183	HexFPConstant 0x[0-9A-Fa-f]+
				184
				185	/* HexIntConstant - Hexadecimal constant generated by the CFE to avoid forcing
				186	* it to deal with 64 bit numbers.
				187	*/
				188	HexIntConstant [us]0x[0-9A-Fa-f]+
				189	%%
				190
				191	{Comment} { /* Ignore comments for now */ }
				192
				193	begin { return BEGINTOK; }
				194	end { return ENDTOK; }
				195	true { return TRUETOK; }
				196	false { return FALSETOK; }
				197	declare { return DECLARE; }
				198	global { return GLOBAL; }
				199	constant { return CONSTANT; }
				200	internal { return INTERNAL; }
				201	linkonce { return LINKONCE; }
				202	weak { return WEAK; }
				203	appending { return APPENDING; }
Anton Korobeynikov	b74ed07	2006-09-14 18:23:27 +0000	[diff] [blame]	204	dllimport { return DLLIMPORT; }
				205	dllexport { return DLLEXPORT; }
				206	extern_weak { return EXTERN_WEAK; }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	207	uninitialized { return EXTERNAL; } /* Deprecated, turn into external */
				208	external { return EXTERNAL; }
				209	implementation { return IMPLEMENTATION; }
				210	zeroinitializer { return ZEROINITIALIZER; }
				211	\.\.\. { return DOTDOTDOT; }
				212	undef { return UNDEF; }
				213	null { return NULL_TOK; }
				214	to { return TO; }
				215	except { RET_TOK(TermOpVal, Unwind, UNWIND); }
				216	not { return NOT; } /* Deprecated, turned into XOR */
				217	tail { return TAIL; }
				218	target { return TARGET; }
				219	triple { return TRIPLE; }
				220	deplibs { return DEPLIBS; }
				221	endian { return ENDIAN; }
				222	pointersize { return POINTERSIZE; }
Chris Lattner	1ae022f	2006-10-22 06:08:13 +0000	[diff] [blame]	223	datalayout { return DATALAYOUT; }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	224	little { return LITTLE; }
				225	big { return BIG; }
				226	volatile { return VOLATILE; }
				227	align { return ALIGN; }
				228	section { return SECTION; }
				229	module { return MODULE; }
				230	asm { return ASM_TOK; }
				231	sideeffect { return SIDEEFFECT; }
				232
				233	cc { return CC_TOK; }
				234	ccc { return CCC_TOK; }
Chris Lattner	7546619	2006-05-19 21:28:53 +0000	[diff] [blame]	235	csretcc { return CSRETCC_TOK; }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	236	fastcc { return FASTCC_TOK; }
				237	coldcc { return COLDCC_TOK; }
Anton Korobeynikov	bcb9770	2006-09-17 20:25:45 +0000	[diff] [blame]	238	x86_stdcallcc { return X86_STDCALLCC_TOK; }
				239	x86_fastcallcc { return X86_FASTCALLCC_TOK; }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	240
				241	void { llvmAsmlval.PrimType = Type::VoidTy ; return VOID; }
				242	bool { llvmAsmlval.PrimType = Type::BoolTy ; return BOOL; }
				243	sbyte { llvmAsmlval.PrimType = Type::SByteTy ; return SBYTE; }
				244	ubyte { llvmAsmlval.PrimType = Type::UByteTy ; return UBYTE; }
				245	short { llvmAsmlval.PrimType = Type::ShortTy ; return SHORT; }
				246	ushort { llvmAsmlval.PrimType = Type::UShortTy; return USHORT; }
				247	int { llvmAsmlval.PrimType = Type::IntTy ; return INT; }
				248	uint { llvmAsmlval.PrimType = Type::UIntTy ; return UINT; }
				249	long { llvmAsmlval.PrimType = Type::LongTy ; return LONG; }
				250	ulong { llvmAsmlval.PrimType = Type::ULongTy ; return ULONG; }
				251	float { llvmAsmlval.PrimType = Type::FloatTy ; return FLOAT; }
				252	double { llvmAsmlval.PrimType = Type::DoubleTy; return DOUBLE; }
				253	label { llvmAsmlval.PrimType = Type::LabelTy ; return LABEL; }
				254	type { return TYPE; }
				255	opaque { return OPAQUE; }
				256
				257	add { RET_TOK(BinaryOpVal, Add, ADD); }
				258	sub { RET_TOK(BinaryOpVal, Sub, SUB); }
				259	mul { RET_TOK(BinaryOpVal, Mul, MUL); }
Reid Spencer	3ed469c	2006-11-02 20:25:50 +0000	[diff] [blame]	260	div { RET_TOK_OBSOLETE(BinaryOpVal, UDiv, UDIV); }
				261	udiv { RET_TOK(BinaryOpVal, UDiv, UDIV); }
				262	sdiv { RET_TOK(BinaryOpVal, SDiv, SDIV); }
				263	fdiv { RET_TOK(BinaryOpVal, FDiv, FDIV); }
				264	rem { RET_TOK_OBSOLETE(BinaryOpVal, URem, UREM); }
				265	urem { RET_TOK(BinaryOpVal, URem, UREM); }
				266	srem { RET_TOK(BinaryOpVal, SRem, SREM); }
				267	frem { RET_TOK(BinaryOpVal, FRem, FREM); }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	268	and { RET_TOK(BinaryOpVal, And, AND); }
				269	or { RET_TOK(BinaryOpVal, Or , OR ); }
				270	xor { RET_TOK(BinaryOpVal, Xor, XOR); }
				271	setne { RET_TOK(BinaryOpVal, SetNE, SETNE); }
				272	seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); }
				273	setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); }
				274	setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); }
				275	setle { RET_TOK(BinaryOpVal, SetLE, SETLE); }
				276	setge { RET_TOK(BinaryOpVal, SetGE, SETGE); }
				277
				278	phi { RET_TOK(OtherOpVal, PHI, PHI_TOK); }
				279	call { RET_TOK(OtherOpVal, Call, CALL); }
				280	cast { RET_TOK(OtherOpVal, Cast, CAST); }
				281	select { RET_TOK(OtherOpVal, Select, SELECT); }
				282	shl { RET_TOK(OtherOpVal, Shl, SHL); }
				283	shr { RET_TOK(OtherOpVal, Shr, SHR); }
				284	vanext { return VANEXT_old; }
				285	vaarg { return VAARG_old; }
				286	va_arg { RET_TOK(OtherOpVal, VAArg , VAARG); }
				287	ret { RET_TOK(TermOpVal, Ret, RET); }
				288	br { RET_TOK(TermOpVal, Br, BR); }
				289	switch { RET_TOK(TermOpVal, Switch, SWITCH); }
				290	invoke { RET_TOK(TermOpVal, Invoke, INVOKE); }
				291	unwind { RET_TOK(TermOpVal, Unwind, UNWIND); }
				292	unreachable { RET_TOK(TermOpVal, Unreachable, UNREACHABLE); }
				293
				294	malloc { RET_TOK(MemOpVal, Malloc, MALLOC); }
				295	alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); }
				296	free { RET_TOK(MemOpVal, Free, FREE); }
				297	load { RET_TOK(MemOpVal, Load, LOAD); }
				298	store { RET_TOK(MemOpVal, Store, STORE); }
				299	getelementptr { RET_TOK(MemOpVal, GetElementPtr, GETELEMENTPTR); }
				300
				301	extractelement { RET_TOK(OtherOpVal, ExtractElement, EXTRACTELEMENT); }
				302	insertelement { RET_TOK(OtherOpVal, InsertElement, INSERTELEMENT); }
Chris Lattner	d5efe84	2006-04-08 01:18:56 +0000	[diff] [blame]	303	shufflevector { RET_TOK(OtherOpVal, ShuffleVector, SHUFFLEVECTOR); }
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	304
				305
				306	{VarID} {
				307	UnEscapeLexed(yytext+1);
				308	llvmAsmlval.StrVal = strdup(yytext+1); // Skip %
				309	return VAR_ID;
				310	}
				311	{Label} {
				312	yytext[strlen(yytext)-1] = 0; // nuke colon
				313	UnEscapeLexed(yytext);
				314	llvmAsmlval.StrVal = strdup(yytext);
				315	return LABELSTR;
				316	}
				317	{QuoteLabel} {
				318	yytext[strlen(yytext)-2] = 0; // nuke colon, end quote
				319	UnEscapeLexed(yytext+1);
				320	llvmAsmlval.StrVal = strdup(yytext+1);
				321	return LABELSTR;
				322	}
				323
				324	{StringConstant} { // Note that we cannot unescape a string constant here! The
				325	// string constant might contain a \00 which would not be
				326	// understood by the string stuff. It is valid to make a
				327	// [sbyte] c"Hello World\00" constant, for example.
				328	//
				329	yytext[strlen(yytext)-1] = 0; // nuke end quote
				330	llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote
				331	return STRINGCONSTANT;
				332	}
				333
				334
				335	{PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; }
				336	{NInteger} {
				337	uint64_t Val = atoull(yytext+1);
				338	// +1: we have bigger negative range
				339	if (Val > (uint64_t)INT64_MAX+1)
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	340	GenerateError("Constant too large for signed 64 bits!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	341	llvmAsmlval.SInt64Val = -Val;
				342	return ESINT64VAL;
				343	}
				344	{HexIntConstant} {
				345	llvmAsmlval.UInt64Val = HexIntToVal(yytext+3);
				346	return yytext[0] == 's' ? ESINT64VAL : EUINT64VAL;
				347	}
				348
				349	{EPInteger} {
				350	uint64_t Val = atoull(yytext+1);
				351	if ((unsigned)Val != Val)
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	352	GenerateError("Invalid value number (too large)!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	353	llvmAsmlval.UIntVal = unsigned(Val);
				354	return UINTVAL;
				355	}
				356	{ENInteger} {
				357	uint64_t Val = atoull(yytext+2);
				358	// +1: we have bigger negative range
				359	if (Val > (uint64_t)INT32_MAX+1)
Reid Spencer	61c83e0	2006-08-18 08:43:06 +0000	[diff] [blame]	360	GenerateError("Constant too large for signed 32 bits!");
Chris Lattner	32eecb0	2006-02-14 05:14:46 +0000	[diff] [blame]	361	llvmAsmlval.SIntVal = (int)-Val;
				362	return SINTVAL;
				363	}
				364
				365	{FPConstant} { llvmAsmlval.FPVal = atof(yytext); return FPVAL; }
				366	{HexFPConstant} { llvmAsmlval.FPVal = HexToFP(yytext); return FPVAL; }
				367
				368	<<EOF>> {
				369	/* Make sure to free the internal buffers for flex when we are
				370	* done reading our input!
				371	*/
				372	yy_delete_buffer(YY_CURRENT_BUFFER);
				373	return EOF;
				374	}
				375
				376	[ \r\t\n] { /* Ignore whitespace */ }
				377	. { return yytext[0]; }
				378
				379	%%