Blame - lib/AsmParser/LLLexer.cpp - fp2-dev/platform/external/llvm

blob: 27798be618e7f13199163ef1749516f74695a07d [file] [log] [blame]

Chris Lattner	8e3a8e0	2007-11-18 08:46:26 +0000	[diff] [blame]	1	//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for .ll files.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "LLLexer.h"
				15	#include "ParserInternals.h"
				16	#include "llvm/Support/MemoryBuffer.h"
				17
				18	#include <list>
				19	#include "llvmAsmParser.h"
				20	using namespace llvm;
				21
				22	//===----------------------------------------------------------------------===//
				23	// Helper functions.
				24	//===----------------------------------------------------------------------===//
				25
				26	// atoull - Convert an ascii string of decimal digits into the unsigned long
				27	// long representation... this does not have to do input error checking,
				28	// because we know that the input will be matched by a suitable regex...
				29	//
				30	static uint64_t atoull(const char Buffer, const char End) {
				31	uint64_t Result = 0;
				32	for (; Buffer != End; Buffer++) {
				33	uint64_t OldRes = Result;
				34	Result *= 10;
				35	Result += *Buffer-'0';
				36	if (Result < OldRes) { // Uh, oh, overflow detected!!!
				37	GenerateError("constant bigger than 64 bits detected!");
				38	return 0;
				39	}
				40	}
				41	return Result;
				42	}
				43
				44	static uint64_t HexIntToVal(const char Buffer, const char End) {
				45	uint64_t Result = 0;
				46	for (; Buffer != End; ++Buffer) {
				47	uint64_t OldRes = Result;
				48	Result *= 16;
				49	char C = *Buffer;
				50	if (C >= '0' && C <= '9')
				51	Result += C-'0';
				52	else if (C >= 'A' && C <= 'F')
				53	Result += C-'A'+10;
				54	else if (C >= 'a' && C <= 'f')
				55	Result += C-'a'+10;
				56
				57	if (Result < OldRes) { // Uh, oh, overflow detected!!!
				58	GenerateError("constant bigger than 64 bits detected!");
				59	return 0;
				60	}
				61	}
				62	return Result;
				63	}
				64
				65	// HexToFP - Convert the ascii string in hexadecimal format to the floating
				66	// point representation of it.
				67	//
				68	static double HexToFP(const char Buffer, const char End) {
				69	return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double
				70	}
				71
				72	static void HexToIntPair(const char Buffer, const char End, uint64_t Pair[2]){
				73	Pair[0] = 0;
				74	for (int i=0; i<16; i++, Buffer++) {
				75	assert(Buffer != End);
				76	Pair[0] *= 16;
				77	char C = *Buffer;
				78	if (C >= '0' && C <= '9')
				79	Pair[0] += C-'0';
				80	else if (C >= 'A' && C <= 'F')
				81	Pair[0] += C-'A'+10;
				82	else if (C >= 'a' && C <= 'f')
				83	Pair[0] += C-'a'+10;
				84	}
				85	Pair[1] = 0;
				86	for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
				87	Pair[1] *= 16;
				88	char C = *Buffer;
				89	if (C >= '0' && C <= '9')
				90	Pair[1] += C-'0';
				91	else if (C >= 'A' && C <= 'F')
				92	Pair[1] += C-'A'+10;
				93	else if (C >= 'a' && C <= 'f')
				94	Pair[1] += C-'a'+10;
				95	}
Chris Lattner	d343c6b	2007-11-18 18:25:18 +0000	[diff] [blame]	96	if (Buffer != End)
Chris Lattner	8e3a8e0	2007-11-18 08:46:26 +0000	[diff] [blame]	97	GenerateError("constant bigger than 128 bits detected!");
				98	}
				99
				100	// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
				101	// appropriate character.
				102	static void UnEscapeLexed(std::string &Str) {
				103	if (Str.empty()) return;
				104
				105	char Buffer = &Str[0], EndBuffer = Buffer+Str.size();
				106	char *BOut = Buffer;
				107	for (char *BIn = Buffer; BIn != EndBuffer; ) {
				108	if (BIn[0] == '\\') {
				109	if (BIn < EndBuffer-1 && BIn[1] == '\\') {
				110	*BOut++ = '\\'; // Two \ becomes one
				111	BIn += 2;
				112	} else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
				113	char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
				114	*BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
				115	BIn[3] = Tmp; // Restore character
				116	BIn += 3; // Skip over handled chars
				117	++BOut;
				118	} else {
				119	BOut++ = BIn++;
				120	}
				121	} else {
				122	BOut++ = BIn++;
				123	}
				124	}
				125	Str.resize(BOut-Buffer);
				126	}
				127
				128	/// isLabelChar - Return true for [-a-zA-Z$._0-9].
				129	static bool isLabelChar(char C) {
				130	return isalnum(C) \|\| C == '-' \|\| C == '$' \|\| C == '.' \|\| C == '_';
				131	}
				132
				133
				134	/// isLabelTail - Return true if this pointer points to a valid end of a label.
				135	static const char isLabelTail(const char CurPtr) {
				136	while (1) {
				137	if (CurPtr[0] == ':') return CurPtr+1;
				138	if (!isLabelChar(CurPtr[0])) return 0;
				139	++CurPtr;
				140	}
				141	}
				142
				143
				144
				145	//===----------------------------------------------------------------------===//
				146	// Lexer definition.
				147	//===----------------------------------------------------------------------===//
				148
				149	// FIXME: REMOVE THIS.
				150	#define YYEOF 0
				151	#define YYERROR -2
				152
				153	LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) {
				154	CurPtr = CurBuf->getBufferStart();
				155	}
				156
				157	std::string LLLexer::getFilename() const {
				158	return CurBuf->getBufferIdentifier();
				159	}
				160
				161	int LLLexer::getNextChar() {
				162	char CurChar = *CurPtr++;
				163	switch (CurChar) {
				164	default: return (unsigned char)CurChar;
				165	case 0:
				166	// A nul character in the stream is either the end of the current buffer or
				167	// a random nul in the file. Disambiguate that here.
				168	if (CurPtr-1 != CurBuf->getBufferEnd())
				169	return 0; // Just whitespace.
				170
				171	// Otherwise, return end of file.
				172	--CurPtr; // Another call to lex will return EOF again.
				173	return EOF;
				174	case '\n':
				175	case '\r':
				176	// Handle the newline character by ignoring it and incrementing the line
				177	// count. However, be careful about 'dos style' files with \n\r in them.
				178	// Only treat a \n\r or \r\n as a single line.
				179	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				180	*CurPtr != CurChar)
				181	++CurPtr; // Eat the two char newline sequence.
				182
				183	++CurLineNo;
				184	return '\n';
				185	}
				186	}
				187
				188
				189	int LLLexer::LexToken() {
				190	TokStart = CurPtr;
				191
				192	int CurChar = getNextChar();
				193
				194	switch (CurChar) {
				195	default:
				196	// Handle letters: [a-zA-Z_]
				197	if (isalpha(CurChar) \|\| CurChar == '_')
				198	return LexIdentifier();
				199
				200	return CurChar;
				201	case EOF: return YYEOF;
				202	case 0:
				203	case ' ':
				204	case '\t':
				205	case '\n':
				206	case '\r':
				207	// Ignore whitespace.
				208	return LexToken();
				209	case '+': return LexPositive();
				210	case '@': return LexAt();
				211	case '%': return LexPercent();
				212	case '"': return LexQuote();
				213	case '.':
				214	if (const char *Ptr = isLabelTail(CurPtr)) {
				215	CurPtr = Ptr;
				216	llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1);
				217	return LABELSTR;
				218	}
				219	if (CurPtr[0] == '.' && CurPtr[1] == '.') {
				220	CurPtr += 2;
				221	return DOTDOTDOT;
				222	}
				223	return '.';
				224	case '$':
				225	if (const char *Ptr = isLabelTail(CurPtr)) {
				226	CurPtr = Ptr;
				227	llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1);
				228	return LABELSTR;
				229	}
				230	return '$';
				231	case ';':
				232	SkipLineComment();
				233	return LexToken();
				234	case '0': case '1': case '2': case '3': case '4':
				235	case '5': case '6': case '7': case '8': case '9':
				236	case '-':
				237	return LexDigitOrNegative();
				238	}
				239	}
				240
				241	void LLLexer::SkipLineComment() {
				242	while (1) {
				243	if (CurPtr[0] == '\n' \|\| CurPtr[0] == '\r' \|\| getNextChar() == EOF)
				244	return;
				245	}
				246	}
				247
				248	/// LexAt - Lex all tokens that start with an @ character:
				249	/// AtStringConstant @\"[^\"]*\"
				250	/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]*
				251	/// GlobalVarID @[0-9]+
				252	int LLLexer::LexAt() {
				253	// Handle AtStringConstant: @\"[^\"]*\"
				254	if (CurPtr[0] == '"') {
				255	++CurPtr;
				256
				257	while (1) {
				258	int CurChar = getNextChar();
				259
				260	if (CurChar == EOF) {
				261	GenerateError("End of file in global variable name");
				262	return YYERROR;
				263	}
				264	if (CurChar == '"') {
				265	llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1);
				266	UnEscapeLexed(*llvmAsmlval.StrVal);
				267	return ATSTRINGCONSTANT;
				268	}
				269	}
				270	}
				271
				272	// Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
				273	if (isalpha(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				274	CurPtr[0] == '.' \|\| CurPtr[0] == '_') {
				275	++CurPtr;
				276	while (isalnum(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				277	CurPtr[0] == '.' \|\| CurPtr[0] == '_')
				278	++CurPtr;
				279
				280	llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @
				281	return GLOBALVAR;
				282	}
				283
				284	// Handle GlobalVarID: @[0-9]+
				285	if (isdigit(CurPtr[0])) {
				286	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
				287
				288	uint64_t Val = atoull(TokStart+1, CurPtr);
				289	if ((unsigned)Val != Val)
				290	GenerateError("Invalid value number (too large)!");
				291	llvmAsmlval.UIntVal = unsigned(Val);
				292	return GLOBALVAL_ID;
				293	}
				294
				295	return '@';
				296	}
				297
				298
				299	/// LexPercent - Lex all tokens that start with a % character:
				300	/// PctStringConstant %\"[^\"]*\"
				301	/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]*
				302	/// LocalVarID %[0-9]+
				303	int LLLexer::LexPercent() {
				304	// Handle PctStringConstant: %\"[^\"]*\"
				305	if (CurPtr[0] == '"') {
				306	++CurPtr;
				307
				308	while (1) {
				309	int CurChar = getNextChar();
				310
				311	if (CurChar == EOF) {
				312	GenerateError("End of file in local variable name");
				313	return YYERROR;
				314	}
				315	if (CurChar == '"') {
				316	llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1);
				317	UnEscapeLexed(*llvmAsmlval.StrVal);
				318	return PCTSTRINGCONSTANT;
				319	}
				320	}
				321	}
				322
				323	// Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
				324	if (isalpha(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				325	CurPtr[0] == '.' \|\| CurPtr[0] == '_') {
				326	++CurPtr;
				327	while (isalnum(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				328	CurPtr[0] == '.' \|\| CurPtr[0] == '_')
				329	++CurPtr;
				330
				331	llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip %
				332	return LOCALVAR;
				333	}
				334
				335	// Handle LocalVarID: %[0-9]+
				336	if (isdigit(CurPtr[0])) {
				337	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
				338
				339	uint64_t Val = atoull(TokStart+1, CurPtr);
				340	if ((unsigned)Val != Val)
				341	GenerateError("Invalid value number (too large)!");
				342	llvmAsmlval.UIntVal = unsigned(Val);
				343	return LOCALVAL_ID;
				344	}
				345
				346	return '%';
				347	}
				348
				349	/// LexQuote - Lex all tokens that start with a " character:
				350	/// QuoteLabel "[^"]+":
				351	/// StringConstant "[^"]*"
				352	int LLLexer::LexQuote() {
				353	while (1) {
				354	int CurChar = getNextChar();
				355
				356	if (CurChar == EOF) {
				357	GenerateError("End of file in quoted string");
				358	return YYERROR;
				359	}
				360
				361	if (CurChar != '"') continue;
				362
				363	if (CurPtr[0] != ':') {
				364	llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1);
				365	UnEscapeLexed(*llvmAsmlval.StrVal);
				366	return STRINGCONSTANT;
				367	}
				368
				369	++CurPtr;
				370	llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2);
				371	UnEscapeLexed(*llvmAsmlval.StrVal);
				372	return LABELSTR;
				373	}
				374	}
				375
				376	static bool JustWhitespaceNewLine(const char *&Ptr) {
				377	const char *ThisPtr = Ptr;
				378	while (ThisPtr == ' ' \|\| ThisPtr == '\t')
				379	++ThisPtr;
				380	if (ThisPtr == '\n' \|\| ThisPtr == '\r') {
				381	Ptr = ThisPtr;
				382	return true;
				383	}
				384	return false;
				385	}
				386
				387
				388	/// LexIdentifier: Handle several related productions:
				389	/// Label [-a-zA-Z$._0-9]+:
				390	/// IntegerType i[0-9]+
				391	/// Keyword sdiv, float, ...
				392	/// HexIntConstant [us]0x[0-9A-Fa-f]+
				393	int LLLexer::LexIdentifier() {
				394	const char *StartChar = CurPtr;
				395	const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
				396	const char *KeywordEnd = 0;
				397
				398	for (; isLabelChar(*CurPtr); ++CurPtr) {
				399	// If we decide this is an integer, remember the end of the sequence.
				400	if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
				401	if (!KeywordEnd && !isalnum(CurPtr) && CurPtr != '_') KeywordEnd = CurPtr;
				402	}
				403
				404	// If we stopped due to a colon, this really is a label.
				405	if (*CurPtr == ':') {
				406	llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++);
				407	return LABELSTR;
				408	}
				409
				410	// Otherwise, this wasn't a label. If this was valid as an integer type,
				411	// return it.
				412	if (IntEnd == 0) IntEnd = CurPtr;
				413	if (IntEnd != StartChar) {
				414	CurPtr = IntEnd;
				415	uint64_t NumBits = atoull(StartChar, CurPtr);
				416	if (NumBits < IntegerType::MIN_INT_BITS \|\|
				417	NumBits > IntegerType::MAX_INT_BITS) {
				418	GenerateError("Bitwidth for integer type out of range!");
				419	return YYERROR;
				420	}
				421	const Type* Ty = IntegerType::get(NumBits);
				422	llvmAsmlval.PrimType = Ty;
				423	return INTTYPE;
				424	}
				425
				426	// Otherwise, this was a letter sequence. See which keyword this is.
				427	if (KeywordEnd == 0) KeywordEnd = CurPtr;
				428	CurPtr = KeywordEnd;
				429	--StartChar;
				430	unsigned Len = CurPtr-StartChar;
				431	#define KEYWORD(STR, TOK) \
				432	if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK;
				433
				434	KEYWORD("begin", BEGINTOK);
				435	KEYWORD("end", ENDTOK);
				436	KEYWORD("true", TRUETOK);
				437	KEYWORD("false", FALSETOK);
				438	KEYWORD("declare", DECLARE);
				439	KEYWORD("define", DEFINE);
				440	KEYWORD("global", GLOBAL);
				441	KEYWORD("constant", CONSTANT);
				442
				443	KEYWORD("internal", INTERNAL);
				444	KEYWORD("linkonce", LINKONCE);
				445	KEYWORD("weak", WEAK);
				446	KEYWORD("appending", APPENDING);
				447	KEYWORD("dllimport", DLLIMPORT);
				448	KEYWORD("dllexport", DLLEXPORT);
				449	KEYWORD("hidden", HIDDEN);
				450	KEYWORD("protected", PROTECTED);
				451	KEYWORD("extern_weak", EXTERN_WEAK);
				452	KEYWORD("external", EXTERNAL);
				453	KEYWORD("thread_local", THREAD_LOCAL);
				454	KEYWORD("zeroinitializer", ZEROINITIALIZER);
				455	KEYWORD("undef", UNDEF);
				456	KEYWORD("null", NULL_TOK);
				457	KEYWORD("to", TO);
				458	KEYWORD("tail", TAIL);
				459	KEYWORD("target", TARGET);
				460	KEYWORD("triple", TRIPLE);
				461	KEYWORD("deplibs", DEPLIBS);
				462	KEYWORD("datalayout", DATALAYOUT);
				463	KEYWORD("volatile", VOLATILE);
				464	KEYWORD("align", ALIGN);
				465	KEYWORD("section", SECTION);
				466	KEYWORD("alias", ALIAS);
				467	KEYWORD("module", MODULE);
				468	KEYWORD("asm", ASM_TOK);
				469	KEYWORD("sideeffect", SIDEEFFECT);
				470
				471	KEYWORD("cc", CC_TOK);
				472	KEYWORD("ccc", CCC_TOK);
				473	KEYWORD("fastcc", FASTCC_TOK);
				474	KEYWORD("coldcc", COLDCC_TOK);
				475	KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK);
				476	KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK);
				477
				478	KEYWORD("signext", SIGNEXT);
				479	KEYWORD("zeroext", ZEROEXT);
				480	KEYWORD("inreg", INREG);
				481	KEYWORD("sret", SRET);
				482	KEYWORD("nounwind", NOUNWIND);
				483	KEYWORD("noreturn", NORETURN);
				484	KEYWORD("noalias", NOALIAS);
				485	KEYWORD("byval", BYVAL);
				486	KEYWORD("nest", NEST);
				487	KEYWORD("pure", PURE);
				488	KEYWORD("const", CONST);
				489
				490	KEYWORD("type", TYPE);
				491	KEYWORD("opaque", OPAQUE);
				492
				493	KEYWORD("eq" , EQ);
				494	KEYWORD("ne" , NE);
				495	KEYWORD("slt", SLT);
				496	KEYWORD("sgt", SGT);
				497	KEYWORD("sle", SLE);
				498	KEYWORD("sge", SGE);
				499	KEYWORD("ult", ULT);
				500	KEYWORD("ugt", UGT);
				501	KEYWORD("ule", ULE);
				502	KEYWORD("uge", UGE);
				503	KEYWORD("oeq", OEQ);
				504	KEYWORD("one", ONE);
				505	KEYWORD("olt", OLT);
				506	KEYWORD("ogt", OGT);
				507	KEYWORD("ole", OLE);
				508	KEYWORD("oge", OGE);
				509	KEYWORD("ord", ORD);
				510	KEYWORD("uno", UNO);
				511	KEYWORD("ueq", UEQ);
				512	KEYWORD("une", UNE);
				513	#undef KEYWORD
				514
				515	// Keywords for types.
				516	#define TYPEKEYWORD(STR, LLVMTY, TOK) \
				517	if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
				518	llvmAsmlval.PrimType = LLVMTY; return TOK; }
				519	TYPEKEYWORD("void", Type::VoidTy, VOID);
				520	TYPEKEYWORD("float", Type::FloatTy, FLOAT);
				521	TYPEKEYWORD("double", Type::DoubleTy, DOUBLE);
				522	TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80);
				523	TYPEKEYWORD("fp128", Type::FP128Ty, FP128);
				524	TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128);
				525	TYPEKEYWORD("label", Type::LabelTy, LABEL);
				526	#undef TYPEKEYWORD
				527
				528	// Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
				529	// to avoid conflicting with the sext/zext instructions, below.
				530	if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
				531	// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
				532	if (JustWhitespaceNewLine(CurPtr))
				533	return SIGNEXT;
				534	} else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
				535	// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
				536	if (JustWhitespaceNewLine(CurPtr))
				537	return ZEROEXT;
				538	}
				539
				540	// Keywords for instructions.
				541	#define INSTKEYWORD(STR, type, Enum, TOK) \
				542	if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
				543	llvmAsmlval.type = Instruction::Enum; return TOK; }
				544
				545	INSTKEYWORD("add", BinaryOpVal, Add, ADD);
				546	INSTKEYWORD("sub", BinaryOpVal, Sub, SUB);
				547	INSTKEYWORD("mul", BinaryOpVal, Mul, MUL);
				548	INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV);
				549	INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV);
				550	INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV);
				551	INSTKEYWORD("urem", BinaryOpVal, URem, UREM);
				552	INSTKEYWORD("srem", BinaryOpVal, SRem, SREM);
				553	INSTKEYWORD("frem", BinaryOpVal, FRem, FREM);
				554	INSTKEYWORD("shl", BinaryOpVal, Shl, SHL);
				555	INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR);
				556	INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR);
				557	INSTKEYWORD("and", BinaryOpVal, And, AND);
				558	INSTKEYWORD("or", BinaryOpVal, Or , OR );
				559	INSTKEYWORD("xor", BinaryOpVal, Xor, XOR);
				560	INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP);
				561	INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP);
				562
				563	INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK);
				564	INSTKEYWORD("call", OtherOpVal, Call, CALL);
				565	INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC);
				566	INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT);
				567	INSTKEYWORD("sext", CastOpVal, SExt, SEXT);
				568	INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC);
				569	INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT);
				570	INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP);
				571	INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP);
				572	INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI);
				573	INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI);
				574	INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR);
				575	INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT);
				576	INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST);
				577	INSTKEYWORD("select", OtherOpVal, Select, SELECT);
				578	INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG);
				579	INSTKEYWORD("ret", TermOpVal, Ret, RET);
				580	INSTKEYWORD("br", TermOpVal, Br, BR);
				581	INSTKEYWORD("switch", TermOpVal, Switch, SWITCH);
				582	INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE);
				583	INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND);
				584	INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE);
				585
				586	INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC);
				587	INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA);
				588	INSTKEYWORD("free", MemOpVal, Free, FREE);
				589	INSTKEYWORD("load", MemOpVal, Load, LOAD);
				590	INSTKEYWORD("store", MemOpVal, Store, STORE);
				591	INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR);
				592
				593	INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT);
				594	INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT);
				595	INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR);
				596	#undef INSTKEYWORD
				597
				598	// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
				599	// the CFE to avoid forcing it to deal with 64-bit numbers.
				600	if ((TokStart[0] == 'u' \|\| TokStart[0] == 's') &&
				601	TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
				602	int len = CurPtr-TokStart-3;
				603	uint32_t bits = len * 4;
				604	APInt Tmp(bits, TokStart+3, len, 16);
				605	uint32_t activeBits = Tmp.getActiveBits();
				606	if (activeBits > 0 && activeBits < bits)
				607	Tmp.trunc(activeBits);
				608	if (Tmp.getBitWidth() > 64) {
				609	llvmAsmlval.APIntVal = new APInt(Tmp);
				610	return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL;
				611	} else if (TokStart[0] == 's') {
				612	llvmAsmlval.SInt64Val = Tmp.getSExtValue();
				613	return ESINT64VAL;
				614	} else {
				615	llvmAsmlval.UInt64Val = Tmp.getZExtValue();
				616	return EUINT64VAL;
				617	}
				618	}
				619
Chris Lattner	4ce0df6	2007-11-18 18:43:24 +0000	[diff] [blame^]	620	// If this is "cc1234", return this as just "cc".
Chris Lattner	8e3a8e0	2007-11-18 08:46:26 +0000	[diff] [blame]	621	if (TokStart[0] == 'c' && TokStart[1] == 'c') {
				622	CurPtr = TokStart+2;
				623	return CC_TOK;
				624	}
				625
Chris Lattner	4ce0df6	2007-11-18 18:43:24 +0000	[diff] [blame^]	626	// If this starts with "call", return it as CALL. This is to support old
				627	// broken .ll files. FIXME: remove this with LLVM 3.0.
				628	if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
				629	CurPtr = TokStart+4;
				630	llvmAsmlval.OtherOpVal = Instruction::Call;
				631	return CALL;
				632	}
				633
Chris Lattner	8e3a8e0	2007-11-18 08:46:26 +0000	[diff] [blame]	634	// Finally, if this isn't known, return just a single character.
				635	CurPtr = TokStart+1;
				636	return TokStart[0];
				637	}
				638
				639
				640	/// Lex0x: Handle productions that start with 0x, knowing that it matches and
				641	/// that this is not a label:
				642	/// HexFPConstant 0x[0-9A-Fa-f]+
				643	/// HexFP80Constant 0xK[0-9A-Fa-f]+
				644	/// HexFP128Constant 0xL[0-9A-Fa-f]+
				645	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
				646	int LLLexer::Lex0x() {
				647	CurPtr = TokStart + 2;
				648
				649	char Kind;
				650	if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
				651	Kind = *CurPtr++;
				652	} else {
				653	Kind = 'J';
				654	}
				655
				656	if (!isxdigit(CurPtr[0])) {
				657	// Bad token, return it as just zero.
				658	CurPtr = TokStart+1;
				659	return '0';
				660	}
				661
				662	while (isxdigit(CurPtr[0]))
				663	++CurPtr;
				664
				665	if (Kind == 'J') {
				666	// HexFPConstant - Floating point constant represented in IEEE format as a
				667	// hexadecimal number for when exponential notation is not precise enough.
				668	// Float and double only.
				669	llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr));
				670	return FPVAL;
				671	}
				672
				673	uint64_t Pair[2];
				674	HexToIntPair(TokStart+3, CurPtr, Pair);
				675	switch (Kind) {
				676	default: assert(0 && "Unknown kind!");
				677	case 'K':
				678	// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
				679	llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair));
				680	return FPVAL;
				681	case 'L':
				682	// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
				683	llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true);
				684	return FPVAL;
				685	case 'M':
				686	// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
				687	llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair));
				688	return FPVAL;
				689	}
				690	}
				691
				692	/// LexIdentifier: Handle several related productions:
				693	/// Label [-a-zA-Z$._0-9]+:
				694	/// NInteger -[0-9]+
				695	/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
				696	/// PInteger [0-9]+
				697	/// HexFPConstant 0x[0-9A-Fa-f]+
				698	/// HexFP80Constant 0xK[0-9A-Fa-f]+
				699	/// HexFP128Constant 0xL[0-9A-Fa-f]+
				700	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
				701	int LLLexer::LexDigitOrNegative() {
				702	// If the letter after the negative is a number, this is probably a label.
				703	if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
				704	// Okay, this is not a number after the -, it's probably a label.
				705	if (const char *End = isLabelTail(CurPtr)) {
				706	llvmAsmlval.StrVal = new std::string(TokStart, End-1);
				707	CurPtr = End;
				708	return LABELSTR;
				709	}
				710
				711	return CurPtr[-1];
				712	}
				713
				714	// At this point, it is either a label, int or fp constant.
				715
				716	// Skip digits, we have at least one.
				717	for (; isdigit(CurPtr[0]); ++CurPtr);
				718
				719	// Check to see if this really is a label afterall, e.g. "-1:".
				720	if (isLabelChar(CurPtr[0]) \|\| CurPtr[0] == ':') {
				721	if (const char *End = isLabelTail(CurPtr)) {
				722	llvmAsmlval.StrVal = new std::string(TokStart, End-1);
				723	CurPtr = End;
				724	return LABELSTR;
				725	}
				726	}
				727
				728	// If the next character is a '.', then it is a fp value, otherwise its
				729	// integer.
				730	if (CurPtr[0] != '.') {
				731	if (TokStart[0] == '0' && TokStart[1] == 'x')
				732	return Lex0x();
				733	unsigned Len = CurPtr-TokStart;
				734	uint32_t numBits = ((Len * 64) / 19) + 2;
				735	APInt Tmp(numBits, TokStart, Len, 10);
				736	if (TokStart[0] == '-') {
				737	uint32_t minBits = Tmp.getMinSignedBits();
				738	if (minBits > 0 && minBits < numBits)
				739	Tmp.trunc(minBits);
				740	if (Tmp.getBitWidth() > 64) {
				741	llvmAsmlval.APIntVal = new APInt(Tmp);
				742	return ESAPINTVAL;
				743	} else {
				744	llvmAsmlval.SInt64Val = Tmp.getSExtValue();
				745	return ESINT64VAL;
				746	}
				747	} else {
				748	uint32_t activeBits = Tmp.getActiveBits();
				749	if (activeBits > 0 && activeBits < numBits)
				750	Tmp.trunc(activeBits);
				751	if (Tmp.getBitWidth() > 64) {
				752	llvmAsmlval.APIntVal = new APInt(Tmp);
				753	return EUAPINTVAL;
				754	} else {
				755	llvmAsmlval.UInt64Val = Tmp.getZExtValue();
				756	return EUINT64VAL;
				757	}
				758	}
				759	}
				760
				761	++CurPtr;
				762
				763	// Skip over [0-9]*([eE][-+]?[0-9]+)?
				764	while (isdigit(CurPtr[0])) ++CurPtr;
				765
				766	if (CurPtr[0] == 'e' \|\| CurPtr[0] == 'E') {
				767	if (isdigit(CurPtr[1]) \|\|
				768	((CurPtr[1] == '-' \|\| CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
				769	CurPtr += 2;
				770	while (isdigit(CurPtr[0])) ++CurPtr;
				771	}
				772	}
				773
				774	llvmAsmlval.FPVal = new APFloat(atof(TokStart));
				775	return FPVAL;
				776	}
				777
				778	/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
				779	int LLLexer::LexPositive() {
				780	// If the letter after the negative is a number, this is probably not a
				781	// label.
				782	if (!isdigit(CurPtr[0]))
				783	return CurPtr[-1];
				784
				785	// Skip digits.
				786	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr);
				787
				788	// At this point, we need a '.'.
				789	if (CurPtr[0] != '.') {
				790	CurPtr = TokStart+1;
				791	return TokStart[0];
				792	}
				793
				794	++CurPtr;
				795
				796	// Skip over [0-9]*([eE][-+]?[0-9]+)?
				797	while (isdigit(CurPtr[0])) ++CurPtr;
				798
				799	if (CurPtr[0] == 'e' \|\| CurPtr[0] == 'E') {
				800	if (isdigit(CurPtr[1]) \|\|
				801	((CurPtr[1] == '-' \|\| CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
				802	CurPtr += 2;
				803	while (isdigit(CurPtr[0])) ++CurPtr;
				804	}
				805	}
				806
				807	llvmAsmlval.FPVal = new APFloat(atof(TokStart));
				808	return FPVAL;
				809	}
				810
				811
				812	//===----------------------------------------------------------------------===//
				813	// Define the interface to this file.
				814	//===----------------------------------------------------------------------===//
				815
				816	static LLLexer *TheLexer;
				817
				818	void InitLLLexer(llvm::MemoryBuffer *MB) {
				819	assert(TheLexer == 0 && "LL Lexer isn't reentrant yet");
				820	TheLexer = new LLLexer(MB);
				821	}
				822
				823	int llvmAsmlex() {
				824	return TheLexer->LexToken();
				825	}
				826	const char *LLLgetTokenStart() { return TheLexer->getTokStart(); }
				827	unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); }
				828	std::string LLLgetFilename() { return TheLexer->getFilename(); }
				829	unsigned LLLgetLineNo() { return TheLexer->getLineNo(); }
				830
				831	void FreeLexer() {
				832	delete TheLexer;
				833	TheLexer = 0;
				834	}