Blame - lib/AsmParser/LLLexer.cpp - fp2-dev/platform/external/llvm

blob: 8ad658d858dbe8fc3bc4312e07a8a57f0abdc142 [file] [log] [blame]

Shih-wei Liao	e264f62	2010-02-10 11:10:31 -0800	[diff] [blame^]	1	//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for .ll files.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "LLLexer.h"
				15	#include "llvm/DerivedTypes.h"
				16	#include "llvm/Instruction.h"
				17	#include "llvm/LLVMContext.h"
				18	#include "llvm/Support/ErrorHandling.h"
				19	#include "llvm/Support/MemoryBuffer.h"
				20	#include "llvm/Support/MathExtras.h"
				21	#include "llvm/Support/SourceMgr.h"
				22	#include "llvm/Support/raw_ostream.h"
				23	#include "llvm/Assembly/Parser.h"
				24	#include <cstdio>
				25	#include <cstdlib>
				26	#include <cstring>
				27	using namespace llvm;
				28
				29	bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
				30	ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
				31	return true;
				32	}
				33
				34	//===----------------------------------------------------------------------===//
				35	// Helper functions.
				36	//===----------------------------------------------------------------------===//
				37
				38	// atoull - Convert an ascii string of decimal digits into the unsigned long
				39	// long representation... this does not have to do input error checking,
				40	// because we know that the input will be matched by a suitable regex...
				41	//
				42	uint64_t LLLexer::atoull(const char Buffer, const char End) {
				43	uint64_t Result = 0;
				44	for (; Buffer != End; Buffer++) {
				45	uint64_t OldRes = Result;
				46	Result *= 10;
				47	Result += *Buffer-'0';
				48	if (Result < OldRes) { // Uh, oh, overflow detected!!!
				49	Error("constant bigger than 64 bits detected!");
				50	return 0;
				51	}
				52	}
				53	return Result;
				54	}
				55
				56	uint64_t LLLexer::HexIntToVal(const char Buffer, const char End) {
				57	uint64_t Result = 0;
				58	for (; Buffer != End; ++Buffer) {
				59	uint64_t OldRes = Result;
				60	Result *= 16;
				61	char C = *Buffer;
				62	if (C >= '0' && C <= '9')
				63	Result += C-'0';
				64	else if (C >= 'A' && C <= 'F')
				65	Result += C-'A'+10;
				66	else if (C >= 'a' && C <= 'f')
				67	Result += C-'a'+10;
				68
				69	if (Result < OldRes) { // Uh, oh, overflow detected!!!
				70	Error("constant bigger than 64 bits detected!");
				71	return 0;
				72	}
				73	}
				74	return Result;
				75	}
				76
				77	void LLLexer::HexToIntPair(const char Buffer, const char End,
				78	uint64_t Pair[2]) {
				79	Pair[0] = 0;
				80	for (int i=0; i<16; i++, Buffer++) {
				81	assert(Buffer != End);
				82	Pair[0] *= 16;
				83	char C = *Buffer;
				84	if (C >= '0' && C <= '9')
				85	Pair[0] += C-'0';
				86	else if (C >= 'A' && C <= 'F')
				87	Pair[0] += C-'A'+10;
				88	else if (C >= 'a' && C <= 'f')
				89	Pair[0] += C-'a'+10;
				90	}
				91	Pair[1] = 0;
				92	for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
				93	Pair[1] *= 16;
				94	char C = *Buffer;
				95	if (C >= '0' && C <= '9')
				96	Pair[1] += C-'0';
				97	else if (C >= 'A' && C <= 'F')
				98	Pair[1] += C-'A'+10;
				99	else if (C >= 'a' && C <= 'f')
				100	Pair[1] += C-'a'+10;
				101	}
				102	if (Buffer != End)
				103	Error("constant bigger than 128 bits detected!");
				104	}
				105
				106	/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
				107	/// { low64, high16 } as usual for an APInt.
				108	void LLLexer::FP80HexToIntPair(const char Buffer, const char End,
				109	uint64_t Pair[2]) {
				110	Pair[1] = 0;
				111	for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
				112	assert(Buffer != End);
				113	Pair[1] *= 16;
				114	char C = *Buffer;
				115	if (C >= '0' && C <= '9')
				116	Pair[1] += C-'0';
				117	else if (C >= 'A' && C <= 'F')
				118	Pair[1] += C-'A'+10;
				119	else if (C >= 'a' && C <= 'f')
				120	Pair[1] += C-'a'+10;
				121	}
				122	Pair[0] = 0;
				123	for (int i=0; i<16; i++, Buffer++) {
				124	Pair[0] *= 16;
				125	char C = *Buffer;
				126	if (C >= '0' && C <= '9')
				127	Pair[0] += C-'0';
				128	else if (C >= 'A' && C <= 'F')
				129	Pair[0] += C-'A'+10;
				130	else if (C >= 'a' && C <= 'f')
				131	Pair[0] += C-'a'+10;
				132	}
				133	if (Buffer != End)
				134	Error("constant bigger than 128 bits detected!");
				135	}
				136
				137	// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
				138	// appropriate character.
				139	static void UnEscapeLexed(std::string &Str) {
				140	if (Str.empty()) return;
				141
				142	char Buffer = &Str[0], EndBuffer = Buffer+Str.size();
				143	char *BOut = Buffer;
				144	for (char *BIn = Buffer; BIn != EndBuffer; ) {
				145	if (BIn[0] == '\\') {
				146	if (BIn < EndBuffer-1 && BIn[1] == '\\') {
				147	*BOut++ = '\\'; // Two \ becomes one
				148	BIn += 2;
				149	} else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
				150	char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
				151	*BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
				152	BIn[3] = Tmp; // Restore character
				153	BIn += 3; // Skip over handled chars
				154	++BOut;
				155	} else {
				156	BOut++ = BIn++;
				157	}
				158	} else {
				159	BOut++ = BIn++;
				160	}
				161	}
				162	Str.resize(BOut-Buffer);
				163	}
				164
				165	/// isLabelChar - Return true for [-a-zA-Z$._0-9].
				166	static bool isLabelChar(char C) {
				167	return isalnum(C) \|\| C == '-' \|\| C == '$' \|\| C == '.' \|\| C == '_';
				168	}
				169
				170
				171	/// isLabelTail - Return true if this pointer points to a valid end of a label.
				172	static const char isLabelTail(const char CurPtr) {
				173	while (1) {
				174	if (CurPtr[0] == ':') return CurPtr+1;
				175	if (!isLabelChar(CurPtr[0])) return 0;
				176	++CurPtr;
				177	}
				178	}
				179
				180
				181
				182	//===----------------------------------------------------------------------===//
				183	// Lexer definition.
				184	//===----------------------------------------------------------------------===//
				185
				186	LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
				187	LLVMContext &C)
				188	: CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
				189	CurPtr = CurBuf->getBufferStart();
				190	}
				191
				192	std::string LLLexer::getFilename() const {
				193	return CurBuf->getBufferIdentifier();
				194	}
				195
				196	int LLLexer::getNextChar() {
				197	char CurChar = *CurPtr++;
				198	switch (CurChar) {
				199	default: return (unsigned char)CurChar;
				200	case 0:
				201	// A nul character in the stream is either the end of the current buffer or
				202	// a random nul in the file. Disambiguate that here.
				203	if (CurPtr-1 != CurBuf->getBufferEnd())
				204	return 0; // Just whitespace.
				205
				206	// Otherwise, return end of file.
				207	--CurPtr; // Another call to lex will return EOF again.
				208	return EOF;
				209	}
				210	}
				211
				212
				213	lltok::Kind LLLexer::LexToken() {
				214	TokStart = CurPtr;
				215
				216	int CurChar = getNextChar();
				217	switch (CurChar) {
				218	default:
				219	// Handle letters: [a-zA-Z_]
				220	if (isalpha(CurChar) \|\| CurChar == '_')
				221	return LexIdentifier();
				222
				223	return lltok::Error;
				224	case EOF: return lltok::Eof;
				225	case 0:
				226	case ' ':
				227	case '\t':
				228	case '\n':
				229	case '\r':
				230	// Ignore whitespace.
				231	return LexToken();
				232	case '+': return LexPositive();
				233	case '@': return LexAt();
				234	case '%': return LexPercent();
				235	case '"': return LexQuote();
				236	case '.':
				237	if (const char *Ptr = isLabelTail(CurPtr)) {
				238	CurPtr = Ptr;
				239	StrVal.assign(TokStart, CurPtr-1);
				240	return lltok::LabelStr;
				241	}
				242	if (CurPtr[0] == '.' && CurPtr[1] == '.') {
				243	CurPtr += 2;
				244	return lltok::dotdotdot;
				245	}
				246	return lltok::Error;
				247	case '$':
				248	if (const char *Ptr = isLabelTail(CurPtr)) {
				249	CurPtr = Ptr;
				250	StrVal.assign(TokStart, CurPtr-1);
				251	return lltok::LabelStr;
				252	}
				253	return lltok::Error;
				254	case ';':
				255	SkipLineComment();
				256	return LexToken();
				257	case '!': return LexExclaim();
				258	case '0': case '1': case '2': case '3': case '4':
				259	case '5': case '6': case '7': case '8': case '9':
				260	case '-':
				261	return LexDigitOrNegative();
				262	case '=': return lltok::equal;
				263	case '[': return lltok::lsquare;
				264	case ']': return lltok::rsquare;
				265	case '{': return lltok::lbrace;
				266	case '}': return lltok::rbrace;
				267	case '<': return lltok::less;
				268	case '>': return lltok::greater;
				269	case '(': return lltok::lparen;
				270	case ')': return lltok::rparen;
				271	case ',': return lltok::comma;
				272	case '*': return lltok::star;
				273	case '\\': return lltok::backslash;
				274	}
				275	}
				276
				277	void LLLexer::SkipLineComment() {
				278	while (1) {
				279	if (CurPtr[0] == '\n' \|\| CurPtr[0] == '\r' \|\| getNextChar() == EOF)
				280	return;
				281	}
				282	}
				283
				284	/// LexAt - Lex all tokens that start with an @ character:
				285	/// GlobalVar @\"[^\"]*\"
				286	/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
				287	/// GlobalVarID @[0-9]+
				288	lltok::Kind LLLexer::LexAt() {
				289	// Handle AtStringConstant: @\"[^\"]*\"
				290	if (CurPtr[0] == '"') {
				291	++CurPtr;
				292
				293	while (1) {
				294	int CurChar = getNextChar();
				295
				296	if (CurChar == EOF) {
				297	Error("end of file in global variable name");
				298	return lltok::Error;
				299	}
				300	if (CurChar == '"') {
				301	StrVal.assign(TokStart+2, CurPtr-1);
				302	UnEscapeLexed(StrVal);
				303	return lltok::GlobalVar;
				304	}
				305	}
				306	}
				307
				308	// Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
				309	if (isalpha(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				310	CurPtr[0] == '.' \|\| CurPtr[0] == '_') {
				311	++CurPtr;
				312	while (isalnum(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				313	CurPtr[0] == '.' \|\| CurPtr[0] == '_')
				314	++CurPtr;
				315
				316	StrVal.assign(TokStart+1, CurPtr); // Skip @
				317	return lltok::GlobalVar;
				318	}
				319
				320	// Handle GlobalVarID: @[0-9]+
				321	if (isdigit(CurPtr[0])) {
				322	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
				323	/empty/;
				324
				325	uint64_t Val = atoull(TokStart+1, CurPtr);
				326	if ((unsigned)Val != Val)
				327	Error("invalid value number (too large)!");
				328	UIntVal = unsigned(Val);
				329	return lltok::GlobalID;
				330	}
				331
				332	return lltok::Error;
				333	}
				334
				335
				336	/// LexPercent - Lex all tokens that start with a % character:
				337	/// LocalVar ::= %\"[^\"]*\"
				338	/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
				339	/// LocalVarID ::= %[0-9]+
				340	lltok::Kind LLLexer::LexPercent() {
				341	// Handle LocalVarName: %\"[^\"]*\"
				342	if (CurPtr[0] == '"') {
				343	++CurPtr;
				344
				345	while (1) {
				346	int CurChar = getNextChar();
				347
				348	if (CurChar == EOF) {
				349	Error("end of file in string constant");
				350	return lltok::Error;
				351	}
				352	if (CurChar == '"') {
				353	StrVal.assign(TokStart+2, CurPtr-1);
				354	UnEscapeLexed(StrVal);
				355	return lltok::LocalVar;
				356	}
				357	}
				358	}
				359
				360	// Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
				361	if (isalpha(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				362	CurPtr[0] == '.' \|\| CurPtr[0] == '_') {
				363	++CurPtr;
				364	while (isalnum(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				365	CurPtr[0] == '.' \|\| CurPtr[0] == '_')
				366	++CurPtr;
				367
				368	StrVal.assign(TokStart+1, CurPtr); // Skip %
				369	return lltok::LocalVar;
				370	}
				371
				372	// Handle LocalVarID: %[0-9]+
				373	if (isdigit(CurPtr[0])) {
				374	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
				375	/empty/;
				376
				377	uint64_t Val = atoull(TokStart+1, CurPtr);
				378	if ((unsigned)Val != Val)
				379	Error("invalid value number (too large)!");
				380	UIntVal = unsigned(Val);
				381	return lltok::LocalVarID;
				382	}
				383
				384	return lltok::Error;
				385	}
				386
				387	/// LexQuote - Lex all tokens that start with a " character:
				388	/// QuoteLabel "[^"]+":
				389	/// StringConstant "[^"]*"
				390	lltok::Kind LLLexer::LexQuote() {
				391	while (1) {
				392	int CurChar = getNextChar();
				393
				394	if (CurChar == EOF) {
				395	Error("end of file in quoted string");
				396	return lltok::Error;
				397	}
				398
				399	if (CurChar != '"') continue;
				400
				401	if (CurPtr[0] != ':') {
				402	StrVal.assign(TokStart+1, CurPtr-1);
				403	UnEscapeLexed(StrVal);
				404	return lltok::StringConstant;
				405	}
				406
				407	++CurPtr;
				408	StrVal.assign(TokStart+1, CurPtr-2);
				409	UnEscapeLexed(StrVal);
				410	return lltok::LabelStr;
				411	}
				412	}
				413
				414	static bool JustWhitespaceNewLine(const char *&Ptr) {
				415	const char *ThisPtr = Ptr;
				416	while (ThisPtr == ' ' \|\| ThisPtr == '\t')
				417	++ThisPtr;
				418	if (ThisPtr == '\n' \|\| ThisPtr == '\r') {
				419	Ptr = ThisPtr;
				420	return true;
				421	}
				422	return false;
				423	}
				424
				425	/// LexExclaim:
				426	/// !foo
				427	/// !
				428	lltok::Kind LLLexer::LexExclaim() {
				429	// Lex a metadata name as a MetadataVar.
				430	if (isalpha(CurPtr[0])) {
				431	++CurPtr;
				432	while (isalnum(CurPtr[0]) \|\| CurPtr[0] == '-' \|\| CurPtr[0] == '$' \|\|
				433	CurPtr[0] == '.' \|\| CurPtr[0] == '_')
				434	++CurPtr;
				435
				436	StrVal.assign(TokStart+1, CurPtr); // Skip !
				437	return lltok::MetadataVar;
				438	}
				439	return lltok::exclaim;
				440	}
				441
				442	/// LexIdentifier: Handle several related productions:
				443	/// Label [-a-zA-Z$._0-9]+:
				444	/// IntegerType i[0-9]+
				445	/// Keyword sdiv, float, ...
				446	/// HexIntConstant [us]0x[0-9A-Fa-f]+
				447	lltok::Kind LLLexer::LexIdentifier() {
				448	const char *StartChar = CurPtr;
				449	const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
				450	const char *KeywordEnd = 0;
				451
				452	for (; isLabelChar(*CurPtr); ++CurPtr) {
				453	// If we decide this is an integer, remember the end of the sequence.
				454	if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
				455	if (!KeywordEnd && !isalnum(CurPtr) && CurPtr != '_') KeywordEnd = CurPtr;
				456	}
				457
				458	// If we stopped due to a colon, this really is a label.
				459	if (*CurPtr == ':') {
				460	StrVal.assign(StartChar-1, CurPtr++);
				461	return lltok::LabelStr;
				462	}
				463
				464	// Otherwise, this wasn't a label. If this was valid as an integer type,
				465	// return it.
				466	if (IntEnd == 0) IntEnd = CurPtr;
				467	if (IntEnd != StartChar) {
				468	CurPtr = IntEnd;
				469	uint64_t NumBits = atoull(StartChar, CurPtr);
				470	if (NumBits < IntegerType::MIN_INT_BITS \|\|
				471	NumBits > IntegerType::MAX_INT_BITS) {
				472	Error("bitwidth for integer type out of range!");
				473	return lltok::Error;
				474	}
				475	TyVal = IntegerType::get(Context, NumBits);
				476	return lltok::Type;
				477	}
				478
				479	// Otherwise, this was a letter sequence. See which keyword this is.
				480	if (KeywordEnd == 0) KeywordEnd = CurPtr;
				481	CurPtr = KeywordEnd;
				482	--StartChar;
				483	unsigned Len = CurPtr-StartChar;
				484	#define KEYWORD(STR) \
				485	if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
				486	return lltok::kw_##STR;
				487
				488	KEYWORD(begin); KEYWORD(end);
				489	KEYWORD(true); KEYWORD(false);
				490	KEYWORD(declare); KEYWORD(define);
				491	KEYWORD(global); KEYWORD(constant);
				492
				493	KEYWORD(private);
				494	KEYWORD(linker_private);
				495	KEYWORD(internal);
				496	KEYWORD(available_externally);
				497	KEYWORD(linkonce);
				498	KEYWORD(linkonce_odr);
				499	KEYWORD(weak);
				500	KEYWORD(weak_odr);
				501	KEYWORD(appending);
				502	KEYWORD(dllimport);
				503	KEYWORD(dllexport);
				504	KEYWORD(common);
				505	KEYWORD(default);
				506	KEYWORD(hidden);
				507	KEYWORD(protected);
				508	KEYWORD(extern_weak);
				509	KEYWORD(external);
				510	KEYWORD(thread_local);
				511	KEYWORD(zeroinitializer);
				512	KEYWORD(undef);
				513	KEYWORD(null);
				514	KEYWORD(to);
				515	KEYWORD(tail);
				516	KEYWORD(target);
				517	KEYWORD(triple);
				518	KEYWORD(deplibs);
				519	KEYWORD(datalayout);
				520	KEYWORD(volatile);
				521	KEYWORD(nuw);
				522	KEYWORD(nsw);
				523	KEYWORD(exact);
				524	KEYWORD(inbounds);
				525	KEYWORD(align);
				526	KEYWORD(addrspace);
				527	KEYWORD(section);
				528	KEYWORD(alias);
				529	KEYWORD(module);
				530	KEYWORD(asm);
				531	KEYWORD(sideeffect);
				532	KEYWORD(alignstack);
				533	KEYWORD(gc);
				534
				535	KEYWORD(ccc);
				536	KEYWORD(fastcc);
				537	KEYWORD(coldcc);
				538	KEYWORD(x86_stdcallcc);
				539	KEYWORD(x86_fastcallcc);
				540	KEYWORD(arm_apcscc);
				541	KEYWORD(arm_aapcscc);
				542	KEYWORD(arm_aapcs_vfpcc);
				543	KEYWORD(msp430_intrcc);
				544
				545	KEYWORD(cc);
				546	KEYWORD(c);
				547
				548	KEYWORD(signext);
				549	KEYWORD(zeroext);
				550	KEYWORD(inreg);
				551	KEYWORD(sret);
				552	KEYWORD(nounwind);
				553	KEYWORD(noreturn);
				554	KEYWORD(noalias);
				555	KEYWORD(nocapture);
				556	KEYWORD(byval);
				557	KEYWORD(nest);
				558	KEYWORD(readnone);
				559	KEYWORD(readonly);
				560
				561	KEYWORD(inlinehint);
				562	KEYWORD(noinline);
				563	KEYWORD(alwaysinline);
				564	KEYWORD(optsize);
				565	KEYWORD(ssp);
				566	KEYWORD(sspreq);
				567	KEYWORD(noredzone);
				568	KEYWORD(noimplicitfloat);
				569	KEYWORD(naked);
				570
				571	KEYWORD(type);
				572	KEYWORD(opaque);
				573
				574	KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
				575	KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
				576	KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
				577	KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
				578
				579	KEYWORD(x);
				580	KEYWORD(blockaddress);
				581	#undef KEYWORD
				582
				583	// Keywords for types.
				584	#define TYPEKEYWORD(STR, LLVMTY) \
				585	if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
				586	TyVal = LLVMTY; return lltok::Type; }
				587	TYPEKEYWORD("void", Type::getVoidTy(Context));
				588	TYPEKEYWORD("float", Type::getFloatTy(Context));
				589	TYPEKEYWORD("double", Type::getDoubleTy(Context));
				590	TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
				591	TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
				592	TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
				593	TYPEKEYWORD("label", Type::getLabelTy(Context));
				594	TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
				595	#undef TYPEKEYWORD
				596
				597	// Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
				598	// to avoid conflicting with the sext/zext instructions, below.
				599	if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
				600	// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
				601	if (JustWhitespaceNewLine(CurPtr))
				602	return lltok::kw_signext;
				603	} else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
				604	// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
				605	if (JustWhitespaceNewLine(CurPtr))
				606	return lltok::kw_zeroext;
				607	} else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
				608	// FIXME: Remove in LLVM 3.0.
				609	// Autoupgrade malloc instruction.
				610	return lltok::kw_malloc;
				611	} else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
				612	// FIXME: Remove in LLVM 3.0.
				613	// Autoupgrade malloc instruction.
				614	return lltok::kw_free;
				615	}
				616
				617	// Keywords for instructions.
				618	#define INSTKEYWORD(STR, Enum) \
				619	if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
				620	UIntVal = Instruction::Enum; return lltok::kw_##STR; }
				621
				622	INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
				623	INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
				624	INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
				625	INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
				626	INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
				627	INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
				628	INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
				629	INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
				630
				631	INSTKEYWORD(phi, PHI);
				632	INSTKEYWORD(call, Call);
				633	INSTKEYWORD(trunc, Trunc);
				634	INSTKEYWORD(zext, ZExt);
				635	INSTKEYWORD(sext, SExt);
				636	INSTKEYWORD(fptrunc, FPTrunc);
				637	INSTKEYWORD(fpext, FPExt);
				638	INSTKEYWORD(uitofp, UIToFP);
				639	INSTKEYWORD(sitofp, SIToFP);
				640	INSTKEYWORD(fptoui, FPToUI);
				641	INSTKEYWORD(fptosi, FPToSI);
				642	INSTKEYWORD(inttoptr, IntToPtr);
				643	INSTKEYWORD(ptrtoint, PtrToInt);
				644	INSTKEYWORD(bitcast, BitCast);
				645	INSTKEYWORD(select, Select);
				646	INSTKEYWORD(va_arg, VAArg);
				647	INSTKEYWORD(ret, Ret);
				648	INSTKEYWORD(br, Br);
				649	INSTKEYWORD(switch, Switch);
				650	INSTKEYWORD(indirectbr, IndirectBr);
				651	INSTKEYWORD(invoke, Invoke);
				652	INSTKEYWORD(unwind, Unwind);
				653	INSTKEYWORD(unreachable, Unreachable);
				654
				655	INSTKEYWORD(alloca, Alloca);
				656	INSTKEYWORD(load, Load);
				657	INSTKEYWORD(store, Store);
				658	INSTKEYWORD(getelementptr, GetElementPtr);
				659
				660	INSTKEYWORD(extractelement, ExtractElement);
				661	INSTKEYWORD(insertelement, InsertElement);
				662	INSTKEYWORD(shufflevector, ShuffleVector);
				663	INSTKEYWORD(getresult, ExtractValue);
				664	INSTKEYWORD(extractvalue, ExtractValue);
				665	INSTKEYWORD(insertvalue, InsertValue);
				666	#undef INSTKEYWORD
				667
				668	// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
				669	// the CFE to avoid forcing it to deal with 64-bit numbers.
				670	if ((TokStart[0] == 'u' \|\| TokStart[0] == 's') &&
				671	TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
				672	int len = CurPtr-TokStart-3;
				673	uint32_t bits = len * 4;
				674	APInt Tmp(bits, StringRef(TokStart+3, len), 16);
				675	uint32_t activeBits = Tmp.getActiveBits();
				676	if (activeBits > 0 && activeBits < bits)
				677	Tmp.trunc(activeBits);
				678	APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
				679	return lltok::APSInt;
				680	}
				681
				682	// If this is "cc1234", return this as just "cc".
				683	if (TokStart[0] == 'c' && TokStart[1] == 'c') {
				684	CurPtr = TokStart+2;
				685	return lltok::kw_cc;
				686	}
				687
				688	// If this starts with "call", return it as CALL. This is to support old
				689	// broken .ll files. FIXME: remove this with LLVM 3.0.
				690	if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
				691	CurPtr = TokStart+4;
				692	UIntVal = Instruction::Call;
				693	return lltok::kw_call;
				694	}
				695
				696	// Finally, if this isn't known, return an error.
				697	CurPtr = TokStart+1;
				698	return lltok::Error;
				699	}
				700
				701
				702	/// Lex0x: Handle productions that start with 0x, knowing that it matches and
				703	/// that this is not a label:
				704	/// HexFPConstant 0x[0-9A-Fa-f]+
				705	/// HexFP80Constant 0xK[0-9A-Fa-f]+
				706	/// HexFP128Constant 0xL[0-9A-Fa-f]+
				707	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
				708	lltok::Kind LLLexer::Lex0x() {
				709	CurPtr = TokStart + 2;
				710
				711	char Kind;
				712	if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
				713	Kind = *CurPtr++;
				714	} else {
				715	Kind = 'J';
				716	}
				717
				718	if (!isxdigit(CurPtr[0])) {
				719	// Bad token, return it as an error.
				720	CurPtr = TokStart+1;
				721	return lltok::Error;
				722	}
				723
				724	while (isxdigit(CurPtr[0]))
				725	++CurPtr;
				726
				727	if (Kind == 'J') {
				728	// HexFPConstant - Floating point constant represented in IEEE format as a
				729	// hexadecimal number for when exponential notation is not precise enough.
				730	// Float and double only.
				731	APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
				732	return lltok::APFloat;
				733	}
				734
				735	uint64_t Pair[2];
				736	switch (Kind) {
				737	default: llvm_unreachable("Unknown kind!");
				738	case 'K':
				739	// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
				740	FP80HexToIntPair(TokStart+3, CurPtr, Pair);
				741	APFloatVal = APFloat(APInt(80, 2, Pair));
				742	return lltok::APFloat;
				743	case 'L':
				744	// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
				745	HexToIntPair(TokStart+3, CurPtr, Pair);
				746	APFloatVal = APFloat(APInt(128, 2, Pair), true);
				747	return lltok::APFloat;
				748	case 'M':
				749	// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
				750	HexToIntPair(TokStart+3, CurPtr, Pair);
				751	APFloatVal = APFloat(APInt(128, 2, Pair));
				752	return lltok::APFloat;
				753	}
				754	}
				755
				756	/// LexIdentifier: Handle several related productions:
				757	/// Label [-a-zA-Z$._0-9]+:
				758	/// NInteger -[0-9]+
				759	/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
				760	/// PInteger [0-9]+
				761	/// HexFPConstant 0x[0-9A-Fa-f]+
				762	/// HexFP80Constant 0xK[0-9A-Fa-f]+
				763	/// HexFP128Constant 0xL[0-9A-Fa-f]+
				764	/// HexPPC128Constant 0xM[0-9A-Fa-f]+
				765	lltok::Kind LLLexer::LexDigitOrNegative() {
				766	// If the letter after the negative is a number, this is probably a label.
				767	if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
				768	// Okay, this is not a number after the -, it's probably a label.
				769	if (const char *End = isLabelTail(CurPtr)) {
				770	StrVal.assign(TokStart, End-1);
				771	CurPtr = End;
				772	return lltok::LabelStr;
				773	}
				774
				775	return lltok::Error;
				776	}
				777
				778	// At this point, it is either a label, int or fp constant.
				779
				780	// Skip digits, we have at least one.
				781	for (; isdigit(CurPtr[0]); ++CurPtr)
				782	/empty/;
				783
				784	// Check to see if this really is a label afterall, e.g. "-1:".
				785	if (isLabelChar(CurPtr[0]) \|\| CurPtr[0] == ':') {
				786	if (const char *End = isLabelTail(CurPtr)) {
				787	StrVal.assign(TokStart, End-1);
				788	CurPtr = End;
				789	return lltok::LabelStr;
				790	}
				791	}
				792
				793	// If the next character is a '.', then it is a fp value, otherwise its
				794	// integer.
				795	if (CurPtr[0] != '.') {
				796	if (TokStart[0] == '0' && TokStart[1] == 'x')
				797	return Lex0x();
				798	unsigned Len = CurPtr-TokStart;
				799	uint32_t numBits = ((Len * 64) / 19) + 2;
				800	APInt Tmp(numBits, StringRef(TokStart, Len), 10);
				801	if (TokStart[0] == '-') {
				802	uint32_t minBits = Tmp.getMinSignedBits();
				803	if (minBits > 0 && minBits < numBits)
				804	Tmp.trunc(minBits);
				805	APSIntVal = APSInt(Tmp, false);
				806	} else {
				807	uint32_t activeBits = Tmp.getActiveBits();
				808	if (activeBits > 0 && activeBits < numBits)
				809	Tmp.trunc(activeBits);
				810	APSIntVal = APSInt(Tmp, true);
				811	}
				812	return lltok::APSInt;
				813	}
				814
				815	++CurPtr;
				816
				817	// Skip over [0-9]*([eE][-+]?[0-9]+)?
				818	while (isdigit(CurPtr[0])) ++CurPtr;
				819
				820	if (CurPtr[0] == 'e' \|\| CurPtr[0] == 'E') {
				821	if (isdigit(CurPtr[1]) \|\|
				822	((CurPtr[1] == '-' \|\| CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
				823	CurPtr += 2;
				824	while (isdigit(CurPtr[0])) ++CurPtr;
				825	}
				826	}
				827
				828	APFloatVal = APFloat(atof(TokStart));
				829	return lltok::APFloat;
				830	}
				831
				832	/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
				833	lltok::Kind LLLexer::LexPositive() {
				834	// If the letter after the negative is a number, this is probably not a
				835	// label.
				836	if (!isdigit(CurPtr[0]))
				837	return lltok::Error;
				838
				839	// Skip digits.
				840	for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
				841	/empty/;
				842
				843	// At this point, we need a '.'.
				844	if (CurPtr[0] != '.') {
				845	CurPtr = TokStart+1;
				846	return lltok::Error;
				847	}
				848
				849	++CurPtr;
				850
				851	// Skip over [0-9]*([eE][-+]?[0-9]+)?
				852	while (isdigit(CurPtr[0])) ++CurPtr;
				853
				854	if (CurPtr[0] == 'e' \|\| CurPtr[0] == 'E') {
				855	if (isdigit(CurPtr[1]) \|\|
				856	((CurPtr[1] == '-' \|\| CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
				857	CurPtr += 2;
				858	while (isdigit(CurPtr[0])) ++CurPtr;
				859	}
				860	}
				861
				862	APFloatVal = APFloat(atof(TokStart));
				863	return lltok::APFloat;
				864	}