Blame - clang/Lex/Lexer.cpp - toolchain/llvm-project

blob: cd514e3308de0bc4270ec8786ac8baab3324bfe8 [file] [log] [blame]

Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1	//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the Lexer and LexerToken interfaces.
				11	//
				12	//===----------------------------------------------------------------------===//
				13	//
				14	// TODO: GCC Diagnostics emitted by the lexer:
				15	// PEDWARN: (form feed\|vertical tab) in preprocessing directive
				16	//
				17	// Universal characters, unicode, char mapping:
				18	// WARNING: `%.*s' is not in NFKC
				19	// WARNING: `%.*s' is not in NFC
				20	//
				21	// Other:
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	22	// TODO: Options to support:
				23	// -fexec-charset,-fwide-exec-charset
				24	//
				25	//===----------------------------------------------------------------------===//
				26
				27	#include "clang/Lex/Lexer.h"
				28	#include "clang/Lex/Preprocessor.h"
				29	#include "clang/Basic/Diagnostic.h"
				30	#include "clang/Basic/SourceBuffer.h"
				31	#include "clang/Basic/SourceLocation.h"
				32	#include "llvm/Config/alloca.h"
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	33	#include <cctype>
				34	#include <iostream>
				35	using namespace llvm;
				36	using namespace clang;
				37
				38	static void InitCharacterInfo();
				39
				40	Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp)
				41	: BufferPtr(File->getBufferStart()), BufferStart(BufferPtr),
				42	BufferEnd(File->getBufferEnd()), InputFile(File), CurFileID(fileid), PP(pp),
				43	Features(PP.getLangOptions()) {
				44	InitCharacterInfo();
				45
				46	assert(BufferEnd[0] == 0 &&
				47	"We assume that the input buffer has a null character at the end"
				48	" to simplify lexing!");
				49
				50	// Start of the file is a start of line.
				51	IsAtStartOfLine = true;
				52
				53	// We are not after parsing a #.
				54	ParsingPreprocessorDirective = false;
				55
				56	// We are not after parsing #include.
				57	ParsingFilename = false;
				58	}
				59
				60	//===----------------------------------------------------------------------===//
				61	// LexerToken implementation.
				62	//===----------------------------------------------------------------------===//
				63
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	64	//===----------------------------------------------------------------------===//
				65	// Character information.
				66	//===----------------------------------------------------------------------===//
				67
				68	static unsigned char CharInfo[256];
				69
				70	enum {
				71	CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
				72	CHAR_VERT_WS = 0x02, // '\r', '\n'
				73	CHAR_LETTER = 0x04, // a-z,A-Z
				74	CHAR_NUMBER = 0x08, // 0-9
				75	CHAR_UNDER = 0x10, // _
				76	CHAR_PERIOD = 0x20 // .
				77	};
				78
				79	static void InitCharacterInfo() {
				80	static bool isInited = false;
				81	if (isInited) return;
				82	isInited = true;
				83
				84	// Intiialize the CharInfo table.
				85	// TODO: statically initialize this.
				86	CharInfo[(int)' '] = CharInfo[(int)'\t'] =
				87	CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
				88	CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
				89
				90	CharInfo[(int)'_'] = CHAR_UNDER;
				91	for (unsigned i = 'a'; i <= 'z'; ++i)
				92	CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
				93	for (unsigned i = '0'; i <= '9'; ++i)
				94	CharInfo[i] = CHAR_NUMBER;
				95	}
				96
				97	/// isIdentifierBody - Return true if this is the body character of an
				98	/// identifier, which is [a-zA-Z0-9_].
				99	static inline bool isIdentifierBody(unsigned char c) {
				100	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER);
				101	}
				102
				103	/// isHorizontalWhitespace - Return true if this character is horizontal
				104	/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'.
				105	static inline bool isHorizontalWhitespace(unsigned char c) {
				106	return CharInfo[c] & CHAR_HORZ_WS;
				107	}
				108
				109	/// isWhitespace - Return true if this character is horizontal or vertical
				110	/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false
				111	/// for '\0'.
				112	static inline bool isWhitespace(unsigned char c) {
				113	return CharInfo[c] & (CHAR_HORZ_WS\|CHAR_VERT_WS);
				114	}
				115
				116	/// isNumberBody - Return true if this is the body character of an
				117	/// preprocessing number, which is [a-zA-Z0-9_.].
				118	static inline bool isNumberBody(unsigned char c) {
				119	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER\|CHAR_PERIOD);
				120	}
				121
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	122
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	123	//===----------------------------------------------------------------------===//
				124	// Diagnostics forwarding code.
				125	//===----------------------------------------------------------------------===//
				126
				127	/// getSourceLocation - Return a source location identifier for the specified
				128	/// offset in the current file.
				129	SourceLocation Lexer::getSourceLocation(const char *Loc) const {
				130	assert(Loc >= InputFile->getBufferStart() && Loc <= InputFile->getBufferEnd()
				131	&& "Location out of range for this buffer!");
				132	return SourceLocation(CurFileID, Loc-InputFile->getBufferStart());
				133	}
				134
				135
				136	/// Diag - Forwarding function for diagnostics. This translate a source
				137	/// position in the current buffer into a SourceLocation object for rendering.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	138	void Lexer::Diag(const char *Loc, unsigned DiagID,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	139	const std::string &Msg) const {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	140	PP.Diag(getSourceLocation(Loc), DiagID, Msg);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	141	}
				142
				143	//===----------------------------------------------------------------------===//
				144	// Trigraph and Escaped Newline Handling Code.
				145	//===----------------------------------------------------------------------===//
				146
				147	/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
				148	/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
				149	static char GetTrigraphCharForLetter(char Letter) {
				150	switch (Letter) {
				151	default: return 0;
				152	case '=': return '#';
				153	case ')': return ']';
				154	case '(': return '[';
				155	case '!': return '\|';
				156	case '\'': return '^';
				157	case '>': return '}';
				158	case '/': return '\\';
				159	case '<': return '{';
				160	case '-': return '~';
				161	}
				162	}
				163
				164	/// DecodeTrigraphChar - If the specified character is a legal trigraph when
				165	/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled,
				166	/// return the result character. Finally, emit a warning about trigraph use
				167	/// whether trigraphs are enabled or not.
				168	static char DecodeTrigraphChar(const char CP, Lexer L) {
				169	char Res = GetTrigraphCharForLetter(*CP);
				170	if (Res && L) {
				171	if (!L->getFeatures().Trigraphs) {
				172	L->Diag(CP-2, diag::trigraph_ignored);
				173	return 0;
				174	} else {
				175	L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
				176	}
				177	}
				178	return Res;
				179	}
				180
				181	/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
				182	/// get its size, and return it. This is tricky in several cases:
				183	/// 1. If currently at the start of a trigraph, we warn about the trigraph,
				184	/// then either return the trigraph (skipping 3 chars) or the '?',
				185	/// depending on whether trigraphs are enabled or not.
				186	/// 2. If this is an escaped newline (potentially with whitespace between
				187	/// the backslash and newline), implicitly skip the newline and return
				188	/// the char after it.
				189	/// 3. If this is a UCN, return it. FIXME: for C++?
				190	///
				191	/// This handles the slow/uncommon case of the getCharAndSize method. Here we
				192	/// know that we can accumulate into Size, and that we have already incremented
				193	/// Ptr by Size bytes.
				194	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	195	/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
				196	/// be updated to match.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	197	///
				198	char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
				199	LexerToken *Tok) {
				200	// If we have a slash, look for an escaped newline.
				201	if (Ptr[0] == '\\') {
				202	++Size;
				203	++Ptr;
				204	Slash:
				205	// Common case, backslash-char where the char is not whitespace.
				206	if (!isWhitespace(Ptr[0])) return '\\';
				207
				208	// See if we have optional whitespace characters followed by a newline.
				209	{
				210	unsigned SizeTmp = 0;
				211	do {
				212	++SizeTmp;
				213	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				214	// Remember that this token needs to be cleaned.
				215	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				216
				217	// Warn if there was whitespace between the backslash and newline.
				218	if (SizeTmp != 1 && Tok)
				219	Diag(Ptr, diag::backslash_newline_space);
				220
				221	// If this is a \r\n or \n\r, skip the newlines.
				222	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				223	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				224	++SizeTmp;
				225
				226	// Found backslash<whitespace><newline>. Parse the char after it.
				227	Size += SizeTmp;
				228	Ptr += SizeTmp;
				229	// Use slow version to accumulate a correct size field.
				230	return getCharAndSizeSlow(Ptr, Size, Tok);
				231	}
				232	} while (isWhitespace(Ptr[SizeTmp]));
				233	}
				234
				235	// Otherwise, this is not an escaped newline, just return the slash.
				236	return '\\';
				237	}
				238
				239	// If this is a trigraph, process it.
				240	if (Ptr[0] == '?' && Ptr[1] == '?') {
				241	// If this is actually a legal trigraph (not something like "??x"), emit
				242	// a trigraph warning. If so, and if trigraphs are enabled, return it.
				243	if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
				244	// Remember that this token needs to be cleaned.
				245	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				246
				247	Ptr += 3;
				248	Size += 3;
				249	if (C == '\\') goto Slash;
				250	return C;
				251	}
				252	}
				253
				254	// If this is neither, return a single character.
				255	++Size;
				256	return *Ptr;
				257	}
				258
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	259
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	260	/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
				261	/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
				262	/// and that we have already incremented Ptr by Size bytes.
				263	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	264	/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
				265	/// be updated to match.
				266	char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	267	const LangOptions &Features) {
				268	// If we have a slash, look for an escaped newline.
				269	if (Ptr[0] == '\\') {
				270	++Size;
				271	++Ptr;
				272	Slash:
				273	// Common case, backslash-char where the char is not whitespace.
				274	if (!isWhitespace(Ptr[0])) return '\\';
				275
				276	// See if we have optional whitespace characters followed by a newline.
				277	{
				278	unsigned SizeTmp = 0;
				279	do {
				280	++SizeTmp;
				281	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				282
				283	// If this is a \r\n or \n\r, skip the newlines.
				284	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				285	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				286	++SizeTmp;
				287
				288	// Found backslash<whitespace><newline>. Parse the char after it.
				289	Size += SizeTmp;
				290	Ptr += SizeTmp;
				291
				292	// Use slow version to accumulate a correct size field.
				293	return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
				294	}
				295	} while (isWhitespace(Ptr[SizeTmp]));
				296	}
				297
				298	// Otherwise, this is not an escaped newline, just return the slash.
				299	return '\\';
				300	}
				301
				302	// If this is a trigraph, process it.
				303	if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
				304	// If this is actually a legal trigraph (not something like "??x"), return
				305	// it.
				306	if (char C = GetTrigraphCharForLetter(Ptr[2])) {
				307	Ptr += 3;
				308	Size += 3;
				309	if (C == '\\') goto Slash;
				310	return C;
				311	}
				312	}
				313
				314	// If this is neither, return a single character.
				315	++Size;
				316	return *Ptr;
				317	}
				318
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	319	//===----------------------------------------------------------------------===//
				320	// Helper methods for lexing.
				321	//===----------------------------------------------------------------------===//
				322
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	323	void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	324	// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
				325	unsigned Size;
				326	unsigned char C = *CurPtr++;
				327	while (isIdentifierBody(C)) {
				328	C = *CurPtr++;
				329	}
				330	--CurPtr; // Back up over the skipped character.
				331
				332	// Fast path, no $,\,? in identifier found. '\' might be an escaped newline
				333	// or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
				334	// FIXME: universal chars.
				335	if (C != '\\' && C != '?' && (C != '$' \|\| !Features.DollarIdents)) {
				336	FinishIdentifier:
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	337	const char IdStart = BufferPtr, IdEnd = CurPtr;
				338	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	339	Result.SetKind(tok::identifier);
				340
				341	// Look up this token, see if it is a macro, or if it is a language keyword.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	342	IdentifierTokenInfo *II;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	343	if (!Result.needsCleaning()) {
				344	// No cleaning needed, just use the characters from the lexed buffer.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	345	II = PP.getIdentifierInfo(IdStart, IdEnd);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	346	} else {
				347	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
Chris Lattner	33ce728	2006-06-18 07:35:33 +0000	[diff] [blame]	348	char TmpBuf = (char)alloca(Result.getLength());
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	349	unsigned Size = PP.getSpelling(Result, TmpBuf);
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	350	II = PP.getIdentifierInfo(TmpBuf, TmpBuf+Size);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	351	}
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	352	Result.SetIdentifierInfo(II);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	353
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	354	// Finally, now that we know we have an identifier, pass this off to the
				355	// preprocessor, which may macro expand it or something.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	356	return PP.HandleIdentifier(Result);
				357	}
				358
				359	// Otherwise, $,\,? in identifier found. Enter slower path.
				360
				361	C = getCharAndSize(CurPtr, Size);
				362	while (1) {
				363	if (C == '$') {
				364	// If we hit a $ and they are not supported in identifiers, we are done.
				365	if (!Features.DollarIdents) goto FinishIdentifier;
				366
				367	// Otherwise, emit a diagnostic and continue.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	368	Diag(CurPtr, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	369	CurPtr = ConsumeChar(CurPtr, Size, Result);
				370	C = getCharAndSize(CurPtr, Size);
				371	continue;
				372	} else if (!isIdentifierBody(C)) { // FIXME: universal chars.
				373	// Found end of identifier.
				374	goto FinishIdentifier;
				375	}
				376
				377	// Otherwise, this character is good, consume it.
				378	CurPtr = ConsumeChar(CurPtr, Size, Result);
				379
				380	C = getCharAndSize(CurPtr, Size);
				381	while (isIdentifierBody(C)) { // FIXME: universal chars.
				382	CurPtr = ConsumeChar(CurPtr, Size, Result);
				383	C = getCharAndSize(CurPtr, Size);
				384	}
				385	}
				386	}
				387
				388
				389	/// LexNumericConstant - Lex the remainer of a integer or floating point
				390	/// constant. From[-1] is the first character lexed. Return the end of the
				391	/// constant.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	392	void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	393	unsigned Size;
				394	char C = getCharAndSize(CurPtr, Size);
				395	char PrevCh = 0;
				396	while (isNumberBody(C)) { // FIXME: universal chars?
				397	CurPtr = ConsumeChar(CurPtr, Size, Result);
				398	PrevCh = C;
				399	C = getCharAndSize(CurPtr, Size);
				400	}
				401
				402	// If we fell out, check for a sign, due to 1e+12. If we have one, continue.
				403	if ((C == '-' \|\| C == '+') && (PrevCh == 'E' \|\| PrevCh == 'e'))
				404	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				405
				406	// If we have a hex FP constant, continue.
				407	if (Features.HexFloats &&
				408	(C == '-' \|\| C == '+') && (PrevCh == 'P' \|\| PrevCh == 'p'))
				409	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				410
				411	Result.SetKind(tok::numeric_constant);
				412
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	413	// Update the location of token as well as BufferPtr.
				414	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	415	}
				416
				417	/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
				418	/// either " or L".
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	419	void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	420	const char *NulCharacter = 0; // Does this string contain the \0 character?
				421
				422	char C = getAndAdvanceChar(CurPtr, Result);
				423	while (C != '"') {
				424	// Skip escaped characters.
				425	if (C == '\\') {
				426	// Skip the escaped character.
				427	C = getAndAdvanceChar(CurPtr, Result);
				428	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				429	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	430	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	431	BufferPtr = CurPtr-1;
				432	return LexTokenInternal(Result);
				433	} else if (C == 0) {
				434	NulCharacter = CurPtr-1;
				435	}
				436	C = getAndAdvanceChar(CurPtr, Result);
				437	}
				438
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	439	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	440
				441	Result.SetKind(tok::string_literal);
				442
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	443	// Update the location of the token as well as the BufferPtr instance var.
				444	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	445	}
				446
				447	/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
				448	/// after having lexed the '<' character. This is used for #include filenames.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	449	void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	450	const char *NulCharacter = 0; // Does this string contain the \0 character?
				451
				452	char C = getAndAdvanceChar(CurPtr, Result);
				453	while (C != '>') {
				454	// Skip escaped characters.
				455	if (C == '\\') {
				456	// Skip the escaped character.
				457	C = getAndAdvanceChar(CurPtr, Result);
				458	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				459	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	460	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	461	BufferPtr = CurPtr-1;
				462	return LexTokenInternal(Result);
				463	} else if (C == 0) {
				464	NulCharacter = CurPtr-1;
				465	}
				466	C = getAndAdvanceChar(CurPtr, Result);
				467	}
				468
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	469	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	470
				471	Result.SetKind(tok::angle_string_literal);
				472
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	473	// Update the location of token as well as BufferPtr.
				474	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	475	}
				476
				477
				478	/// LexCharConstant - Lex the remainder of a character constant, after having
				479	/// lexed either ' or L'.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	480	void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	481	const char *NulCharacter = 0; // Does this character contain the \0 character?
				482
				483	// Handle the common case of 'x' and '\y' efficiently.
				484	char C = getAndAdvanceChar(CurPtr, Result);
				485	if (C == '\'') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	486	Diag(BufferPtr, diag::err_empty_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	487	BufferPtr = CurPtr;
				488	return LexTokenInternal(Result);
				489	} else if (C == '\\') {
				490	// Skip the escaped character.
				491	// FIXME: UCN's.
				492	C = getAndAdvanceChar(CurPtr, Result);
				493	}
				494
				495	if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
				496	++CurPtr;
				497	} else {
				498	// Fall back on generic code for embedded nulls, newlines, wide chars.
				499	do {
				500	// Skip escaped characters.
				501	if (C == '\\') {
				502	// Skip the escaped character.
				503	C = getAndAdvanceChar(CurPtr, Result);
				504	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				505	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	506	Diag(BufferPtr, diag::err_unterminated_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	507	BufferPtr = CurPtr-1;
				508	return LexTokenInternal(Result);
				509	} else if (C == 0) {
				510	NulCharacter = CurPtr-1;
				511	}
				512	C = getAndAdvanceChar(CurPtr, Result);
				513	} while (C != '\'');
				514	}
				515
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	516	if (NulCharacter) Diag(NulCharacter, diag::null_in_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	517
				518	Result.SetKind(tok::char_constant);
				519
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	520	// Update the location of token as well as BufferPtr.
				521	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	522	}
				523
				524	/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
				525	/// Update BufferPtr to point to the next non-whitespace character and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	526	void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	527	// Whitespace - Skip it, then return the token after the whitespace.
				528	unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
				529	while (1) {
				530	// Skip horizontal whitespace very aggressively.
				531	while (isHorizontalWhitespace(Char))
				532	Char = *++CurPtr;
				533
				534	// Otherwise if we something other than whitespace, we're done.
				535	if (Char != '\n' && Char != '\r')
				536	break;
				537
				538	if (ParsingPreprocessorDirective) {
				539	// End of preprocessor directive line, let LexTokenInternal handle this.
				540	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	541	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	542	}
				543
				544	// ok, but handle newline.
				545	// The returned token is at the start of the line.
				546	Result.SetFlag(LexerToken::StartOfLine);
				547	// No leading whitespace seen so far.
				548	Result.ClearFlag(LexerToken::LeadingSpace);
				549	Char = *++CurPtr;
				550	}
				551
				552	// If this isn't immediately after a newline, there is leading space.
				553	char PrevChar = CurPtr[-1];
				554	if (PrevChar != '\n' && PrevChar != '\r')
				555	Result.SetFlag(LexerToken::LeadingSpace);
				556
				557	// If the next token is obviously a // or /* */ comment, skip it efficiently
				558	// too (without going through the big switch stmt).
				559	if (Char == '/' && CurPtr[1] == '/') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	560	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	561	return SkipBCPLComment(Result, CurPtr+1);
				562	}
				563	if (Char == '/' && CurPtr[1] == '*') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	564	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	565	return SkipBlockComment(Result, CurPtr+2);
				566	}
				567	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	568	}
				569
				570	// SkipBCPLComment - We have just read the // characters from input. Skip until
				571	// we find the newline character thats terminate the comment. Then update
				572	/// BufferPtr and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	573	void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	574	// If BCPL comments aren't explicitly enabled for this language, emit an
				575	// extension warning.
				576	if (!Features.BCPLComment) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	577	Diag(BufferPtr, diag::ext_bcpl_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	578
				579	// Mark them enabled so we only emit one warning for this translation
				580	// unit.
				581	Features.BCPLComment = true;
				582	}
				583
				584	// Scan over the body of the comment. The common case, when scanning, is that
				585	// the comment contains normal ascii characters with nothing interesting in
				586	// them. As such, optimize for this case with the inner loop.
				587	char C;
				588	do {
				589	C = *CurPtr;
				590	// FIXME: just scan for a \n or \r character. If we find a \n character,
				591	// scan backwards, checking to see if it's an escaped newline, like we do
				592	// for block comments.
				593
				594	// Skip over characters in the fast loop.
				595	while (C != 0 && // Potentially EOF.
				596	C != '\\' && // Potentially escaped newline.
				597	C != '?' && // Potentially trigraph.
				598	C != '\n' && C != '\r') // Newline or DOS-style newline.
				599	C = *++CurPtr;
				600
				601	// If this is a newline, we're done.
				602	if (C == '\n' \|\| C == '\r')
				603	break; // Found the newline? Break out!
				604
				605	// Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
				606	// properly decode the character.
				607	const char *OldPtr = CurPtr;
				608	C = getAndAdvanceChar(CurPtr, Result);
				609
				610	// If we read multiple characters, and one of those characters was a \r or
				611	// \n, then we had an escaped newline within the comment. Emit diagnostic.
				612	if (CurPtr != OldPtr+1) {
				613	for (; OldPtr != CurPtr; ++OldPtr)
				614	if (OldPtr[0] == '\n' \|\| OldPtr[0] == '\r') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	615	Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
				616	break;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	617	}
				618	}
				619
				620	if (CurPtr == BufferEnd+1) goto FoundEOF;
				621	} while (C != '\n' && C != '\r');
				622
				623	// Found and did not consume a newline.
				624
				625	// If we are inside a preprocessor directive and we see the end of line,
				626	// return immediately, so that the lexer can return this as an EOM token.
				627	if (ParsingPreprocessorDirective) {
				628	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	629	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	630	}
				631
				632	// Otherwise, eat the \n character. We don't care if this is a \n\r or
				633	// \r\n sequence.
				634	++CurPtr;
				635
				636	// The next returned token is at the start of the line.
				637	Result.SetFlag(LexerToken::StartOfLine);
				638	// No leading whitespace seen so far.
				639	Result.ClearFlag(LexerToken::LeadingSpace);
				640
				641	// It is common for the tokens immediately after a // comment to be
				642	// whitespace (indentation for the next line). Instead of going through the
				643	// big switch, handle it efficiently now.
				644	if (isWhitespace(*CurPtr)) {
				645	Result.SetFlag(LexerToken::LeadingSpace);
				646	return SkipWhitespace(Result, CurPtr+1);
				647	}
				648
				649	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	650	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	651
				652	FoundEOF: // If we ran off the end of the buffer, return EOF.
				653	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	654	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	655	}
				656
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	657	/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
				658	/// character (either \n or \r) is part of an escaped newline sequence. Issue a
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	659	/// diagnostic if so. We know that the is inside of a block comment.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	660	static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
				661	Lexer *L) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	662	assert(CurPtr[0] == '\n' \|\| CurPtr[0] == '\r');
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	663
				664	// Back up off the newline.
				665	--CurPtr;
				666
				667	// If this is a two-character newline sequence, skip the other character.
				668	if (CurPtr[0] == '\n' \|\| CurPtr[0] == '\r') {
				669	// \n\n or \r\r -> not escaped newline.
				670	if (CurPtr[0] == CurPtr[1])
				671	return false;
				672	// \n\r or \r\n -> skip the newline.
				673	--CurPtr;
				674	}
				675
				676	// If we have horizontal whitespace, skip over it. We allow whitespace
				677	// between the slash and newline.
				678	bool HasSpace = false;
				679	while (isHorizontalWhitespace(CurPtr) \|\| CurPtr == 0) {
				680	--CurPtr;
				681	HasSpace = true;
				682	}
				683
				684	// If we have a slash, we know this is an escaped newline.
				685	if (*CurPtr == '\\') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	686	if (CurPtr[-1] != '*') return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	687	} else {
				688	// It isn't a slash, is it the ?? / trigraph?
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	689	if (CurPtr[0] != '/' \|\| CurPtr[-1] != '?' \|\| CurPtr[-2] != '?' \|\|
				690	CurPtr[-3] != '*')
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	691	return false;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	692
				693	// This is the trigraph ending the comment. Emit a stern warning!
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	694	CurPtr -= 2;
				695
				696	// If no trigraphs are enabled, warn that we ignored this trigraph and
				697	// ignore this * character.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	698	if (!L->getFeatures().Trigraphs) {
				699	L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	700	return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	701	}
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	702	L->Diag(CurPtr, diag::trigraph_ends_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	703	}
				704
				705	// Warn about having an escaped newline between the */ characters.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	706	L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	707
				708	// If there was space between the backslash and newline, warn about it.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	709	if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	710
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	711	return true;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	712	}
				713
				714	/// SkipBlockComment - We have just read the /* characters from input. Read
				715	/// until we find the */ characters that terminate the comment. Note that we
				716	/// don't bother decoding trigraphs or escaped newlines in block comments,
				717	/// because they cannot cause the comment to end. The only thing that can
				718	/// happen is the comment could end with an escaped newline between the */ end
				719	/// of comment.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	720	void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	721	// Scan one character past where we should, looking for a '/' character. Once
				722	// we find it, check to see if it was preceeded by a *. This common
				723	// optimization helps people who like to put a lot of * characters in their
				724	// comments.
				725	unsigned char C = *CurPtr++;
				726	if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	727	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	728	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	729	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	730	}
				731
				732	while (1) {
				733	// Skip over all non-interesting characters.
				734	// TODO: Vectorize this. Note: memchr on Darwin is slower than this loop.
				735	while (C != '/' && C != '\0')
				736	C = *CurPtr++;
				737
				738	if (C == '/') {
				739	char T;
				740	if (CurPtr[-2] == '') // We found the final /. We're done!
				741	break;
				742
				743	if ((CurPtr[-2] == '\n' \|\| CurPtr[-2] == '\r')) {
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	744	if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	745	// We found the final */, though it had an escaped newline between the
				746	// * and /. We're done!
				747	break;
				748	}
				749	}
				750	if (CurPtr[0] == '*' && CurPtr[1] != '/') {
				751	// If this is a /* inside of the comment, emit a warning. Don't do this
				752	// if this is a /*/, which will end the comment. This misses cases with
				753	// embedded escaped newlines, but oh well.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	754	Diag(CurPtr-1, diag::nested_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	755	}
				756	} else if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	757	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	758	// Note: the user probably forgot a */. We could continue immediately
				759	// after the /*, but this would involve lexing a lot of what really is the
				760	// comment, which surely would confuse the parser.
				761	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	762	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	763	}
				764	C = *CurPtr++;
				765	}
				766
				767	// It is common for the tokens immediately after a /**/ comment to be
				768	// whitespace. Instead of going through the big switch, handle it
				769	// efficiently now.
				770	if (isHorizontalWhitespace(*CurPtr)) {
				771	Result.SetFlag(LexerToken::LeadingSpace);
				772	return SkipWhitespace(Result, CurPtr+1);
				773	}
				774
				775	// Otherwise, just return so that the next character will be lexed as a token.
				776	BufferPtr = CurPtr;
				777	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	778	}
				779
				780	//===----------------------------------------------------------------------===//
				781	// Primary Lexing Entry Points
				782	//===----------------------------------------------------------------------===//
				783
				784	/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
				785	/// (potentially) macro expand the filename.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	786	void Lexer::LexIncludeFilename(LexerToken &Result) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	787	assert(ParsingPreprocessorDirective &&
				788	ParsingFilename == false &&
				789	"Must be in a preprocessing directive!");
				790
				791	// We are now parsing a filename!
				792	ParsingFilename = true;
				793
				794	// There should be exactly two tokens here if everything is good: first the
				795	// filename, then the EOM.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	796	Lex(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	797
				798	// We should have gotten the filename now.
				799	ParsingFilename = false;
				800
				801	// No filename?
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	802	if (Result.getKind() == tok::eom) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	803	PP.Diag(Result, diag::err_pp_expects_filename);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	804	return;
				805	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	806
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	807	// Verify that there is nothing after the filename, other than EOM. Use the
				808	// preprocessor to lex this in case lexing the filename entered a macro.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	809	LexerToken EndTok;
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	810	PP.Lex(EndTok);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	811
				812	if (EndTok.getKind() != tok::eom) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	813	PP.Diag(EndTok, diag::ext_pp_extra_tokens_at_eol, "#include");
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	814
				815	// Lex until the end of the preprocessor directive line.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	816	while (EndTok.getKind() != tok::eom)
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	817	PP.Lex(EndTok);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	818
				819	Result.SetKind(tok::eom);
				820	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	821	}
				822
				823	/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
				824	/// uninterpreted string. This switches the lexer out of directive mode.
				825	std::string Lexer::ReadToEndOfLine() {
				826	assert(ParsingPreprocessorDirective && ParsingFilename == false &&
				827	"Must be in a preprocessing directive!");
				828	std::string Result;
				829	LexerToken Tmp;
				830
				831	// CurPtr - Cache BufferPtr in an automatic variable.
				832	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	833	while (1) {
				834	char Char = getAndAdvanceChar(CurPtr, Tmp);
				835	switch (Char) {
				836	default:
				837	Result += Char;
				838	break;
				839	case 0: // Null.
				840	// Found end of file?
				841	if (CurPtr-1 != BufferEnd) {
				842	// Nope, normal character, continue.
				843	Result += Char;
				844	break;
				845	}
				846	// FALL THROUGH.
				847	case '\r':
				848	case '\n':
				849	// Okay, we found the end of the line. First, back up past the \0, \r, \n.
				850	assert(CurPtr[-1] == Char && "Trigraphs for newline?");
				851	BufferPtr = CurPtr-1;
				852
				853	// Next, lex the character, which should handle the EOM transition.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	854	Lex(Tmp);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	855	assert(Tmp.getKind() == tok::eom && "Unexpected token!");
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	856
				857	// Finally, we're done, return the string we found.
				858	return Result;
				859	}
				860	}
				861	}
				862
				863	/// LexEndOfFile - CurPtr points to the end of this file. Handle this
				864	/// condition, reporting diagnostics and handling other edge cases as required.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	865	void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	866	// If we hit the end of the file while parsing a preprocessor directive,
				867	// end the preprocessor directive first. The next token returned will
				868	// then be the end of file.
				869	if (ParsingPreprocessorDirective) {
				870	// Done parsing the "line".
				871	ParsingPreprocessorDirective = false;
				872	Result.SetKind(tok::eom);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	873	// Update the location of token as well as BufferPtr.
				874	FormTokenWithChars(Result, CurPtr);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	875	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	876	}
				877
				878	// If we are in a #if directive, emit an error.
				879	while (!ConditionalStack.empty()) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	880	PP.Diag(ConditionalStack.back().IfLoc,
				881	diag::err_pp_unterminated_conditional);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	882	ConditionalStack.pop_back();
				883	}
				884
				885	// If the file was empty or didn't end in a newline, issue a pedwarn.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	886	if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
				887	Diag(BufferEnd, diag::ext_no_newline_eof);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	888
				889	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	890	PP.HandleEndOfFile(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	891	}
				892
				893
				894	/// LexTokenInternal - This implements a simple C family lexer. It is an
				895	/// extremely performance critical piece of code. This assumes that the buffer
				896	/// has a null character at the end of the file. Return true if an error
				897	/// occurred and compilation should terminate, false if normal. This returns a
				898	/// preprocessing token, not a normal token, as such, it is an internal
				899	/// interface. It assumes that the Flags of result have been cleared before
				900	/// calling this.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	901	void Lexer::LexTokenInternal(LexerToken &Result) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	902	LexNextToken:
				903	// New token, can't need cleaning yet.
				904	Result.ClearFlag(LexerToken::NeedsCleaning);
				905
				906	// CurPtr - Cache BufferPtr in an automatic variable.
				907	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	908
				909	unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
				910
				911	// Read a character, advancing over it.
				912	char Char = getAndAdvanceChar(CurPtr, Result);
				913	switch (Char) {
				914	case 0: // Null.
				915	// Found end of file?
				916	if (CurPtr-1 == BufferEnd)
				917	return LexEndOfFile(Result, CurPtr-1); // Retreat back into the file.
				918
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	919	Diag(CurPtr-1, diag::null_in_file);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	920	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	921	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	922	goto LexNextToken; // GCC isn't tail call eliminating.
				923	case '\n':
				924	case '\r':
				925	// If we are inside a preprocessor directive and we see the end of line,
				926	// we know we are done with the directive, so return an EOM token.
				927	if (ParsingPreprocessorDirective) {
				928	// Done parsing the "line".
				929	ParsingPreprocessorDirective = false;
				930
				931	// Since we consumed a newline, we are back at the start of a line.
				932	IsAtStartOfLine = true;
				933
				934	Result.SetKind(tok::eom);
				935	break;
				936	}
				937	// The returned token is at the start of the line.
				938	Result.SetFlag(LexerToken::StartOfLine);
				939	// No leading whitespace seen so far.
				940	Result.ClearFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	941	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	942	goto LexNextToken; // GCC isn't tail call eliminating.
				943	case ' ':
				944	case '\t':
				945	case '\f':
				946	case '\v':
				947	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	948	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	949	goto LexNextToken; // GCC isn't tail call eliminating.
				950
				951	case 'L':
				952	Char = getCharAndSize(CurPtr, SizeTmp);
				953
				954	// Wide string literal.
				955	if (Char == '"')
				956	return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				957
				958	// Wide character constant.
				959	if (Char == '\'')
				960	return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				961	// FALL THROUGH, treating L like the start of an identifier.
				962
				963	// C99 6.4.2: Identifiers.
				964	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
				965	case 'H': case 'I': case 'J': case 'K': /'L'/case 'M': case 'N':
				966	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
				967	case 'V': case 'W': case 'X': case 'Y': case 'Z':
				968	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
				969	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
				970	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
				971	case 'v': case 'w': case 'x': case 'y': case 'z':
				972	case '_':
				973	return LexIdentifier(Result, CurPtr);
				974
				975	// C99 6.4.4.1: Integer Constants.
				976	// C99 6.4.4.2: Floating Constants.
				977	case '0': case '1': case '2': case '3': case '4':
				978	case '5': case '6': case '7': case '8': case '9':
				979	return LexNumericConstant(Result, CurPtr);
				980
				981	// C99 6.4.4: Character Constants.
				982	case '\'':
				983	return LexCharConstant(Result, CurPtr);
				984
				985	// C99 6.4.5: String Literals.
				986	case '"':
				987	return LexStringLiteral(Result, CurPtr);
				988
				989	// C99 6.4.6: Punctuators.
				990	case '?':
				991	Result.SetKind(tok::question);
				992	break;
				993	case '[':
				994	Result.SetKind(tok::l_square);
				995	break;
				996	case ']':
				997	Result.SetKind(tok::r_square);
				998	break;
				999	case '(':
				1000	Result.SetKind(tok::l_paren);
				1001	break;
				1002	case ')':
				1003	Result.SetKind(tok::r_paren);
				1004	break;
				1005	case '{':
				1006	Result.SetKind(tok::l_brace);
				1007	break;
				1008	case '}':
				1009	Result.SetKind(tok::r_brace);
				1010	break;
				1011	case '.':
				1012	Char = getCharAndSize(CurPtr, SizeTmp);
				1013	if (Char >= '0' && Char <= '9') {
				1014	return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				1015	} else if (Features.CPlusPlus && Char == '*') {
				1016	Result.SetKind(tok::periodstar);
				1017	CurPtr += SizeTmp;
				1018	} else if (Char == '.' &&
				1019	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
				1020	Result.SetKind(tok::ellipsis);
				1021	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1022	SizeTmp2, Result);
				1023	} else {
				1024	Result.SetKind(tok::period);
				1025	}
				1026	break;
				1027	case '&':
				1028	Char = getCharAndSize(CurPtr, SizeTmp);
				1029	if (Char == '&') {
				1030	Result.SetKind(tok::ampamp);
				1031	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1032	} else if (Char == '=') {
				1033	Result.SetKind(tok::ampequal);
				1034	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1035	} else {
				1036	Result.SetKind(tok::amp);
				1037	}
				1038	break;
				1039	case '*':
				1040	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1041	Result.SetKind(tok::starequal);
				1042	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1043	} else {
				1044	Result.SetKind(tok::star);
				1045	}
				1046	break;
				1047	case '+':
				1048	Char = getCharAndSize(CurPtr, SizeTmp);
				1049	if (Char == '+') {
				1050	Result.SetKind(tok::plusplus);
				1051	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1052	} else if (Char == '=') {
				1053	Result.SetKind(tok::plusequal);
				1054	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1055	} else {
				1056	Result.SetKind(tok::plus);
				1057	}
				1058	break;
				1059	case '-':
				1060	Char = getCharAndSize(CurPtr, SizeTmp);
				1061	if (Char == '-') {
				1062	Result.SetKind(tok::minusminus);
				1063	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1064	} else if (Char == '>' && Features.CPlusPlus &&
				1065	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
				1066	Result.SetKind(tok::arrowstar); // C++ ->*
				1067	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1068	SizeTmp2, Result);
				1069	} else if (Char == '>') {
				1070	Result.SetKind(tok::arrow);
				1071	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1072	} else if (Char == '=') {
				1073	Result.SetKind(tok::minusequal);
				1074	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1075	} else {
				1076	Result.SetKind(tok::minus);
				1077	}
				1078	break;
				1079	case '~':
				1080	Result.SetKind(tok::tilde);
				1081	break;
				1082	case '!':
				1083	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1084	Result.SetKind(tok::exclaimequal);
				1085	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1086	} else {
				1087	Result.SetKind(tok::exclaim);
				1088	}
				1089	break;
				1090	case '/':
				1091	// 6.4.9: Comments
				1092	Char = getCharAndSize(CurPtr, SizeTmp);
				1093	if (Char == '/') { // BCPL comment.
				1094	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1095	SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1096	goto LexNextToken; // GCC isn't tail call eliminating.
				1097	} else if (Char == '') { // /*/ comment.
				1098	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1099	SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1100	goto LexNextToken; // GCC isn't tail call eliminating.
				1101	} else if (Char == '=') {
				1102	Result.SetKind(tok::slashequal);
				1103	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1104	} else {
				1105	Result.SetKind(tok::slash);
				1106	}
				1107	break;
				1108	case '%':
				1109	Char = getCharAndSize(CurPtr, SizeTmp);
				1110	if (Char == '=') {
				1111	Result.SetKind(tok::percentequal);
				1112	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1113	} else if (Features.Digraphs && Char == '>') {
				1114	Result.SetKind(tok::r_brace); // '%>' -> '}'
				1115	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1116	} else if (Features.Digraphs && Char == ':') {
				1117	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1118	if (getCharAndSize(CurPtr, SizeTmp) == '%' &&
				1119	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
				1120	Result.SetKind(tok::hashhash); // '%:%:' -> '##'
				1121	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1122	SizeTmp2, Result);
				1123	} else {
				1124	Result.SetKind(tok::hash); // '%:' -> '#'
				1125
				1126	// We parsed a # character. If this occurs at the start of the line,
				1127	// it's actually the start of a preprocessing directive. Callback to
				1128	// the preprocessor to handle it.
				1129	// FIXME: -fpreprocessed mode??
				1130	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1131	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1132	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1133
				1134	// As an optimization, if the preprocessor didn't switch lexers, tail
				1135	// recurse.
				1136	if (PP.isCurrentLexer(this)) {
				1137	// Start a new token. If this is a #include or something, the PP may
				1138	// want us starting at the beginning of the line again. If so, set
				1139	// the StartOfLine flag.
				1140	if (IsAtStartOfLine) {
				1141	Result.SetFlag(LexerToken::StartOfLine);
				1142	IsAtStartOfLine = false;
				1143	}
				1144	goto LexNextToken; // GCC isn't tail call eliminating.
				1145	}
				1146
				1147	return PP.Lex(Result);
				1148	}
				1149	}
				1150	} else {
				1151	Result.SetKind(tok::percent);
				1152	}
				1153	break;
				1154	case '<':
				1155	Char = getCharAndSize(CurPtr, SizeTmp);
				1156	if (ParsingFilename) {
				1157	return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
				1158	} else if (Char == '<' &&
				1159	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1160	Result.SetKind(tok::lesslessequal);
				1161	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1162	SizeTmp2, Result);
				1163	} else if (Char == '<') {
				1164	Result.SetKind(tok::lessless);
				1165	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1166	} else if (Char == '=') {
				1167	Result.SetKind(tok::lessequal);
				1168	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1169	} else if (Features.Digraphs && Char == ':') {
				1170	Result.SetKind(tok::l_square); // '<:' -> '['
				1171	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1172	} else if (Features.Digraphs && Char == '>') {
				1173	Result.SetKind(tok::l_brace); // '<%' -> '{'
				1174	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1175	} else if (Features.CPPMinMax && Char == '?') { // <?
				1176	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1177	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1178
				1179	if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?=
				1180	Result.SetKind(tok::lessquestionequal);
				1181	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1182	} else {
				1183	Result.SetKind(tok::lessquestion);
				1184	}
				1185	} else {
				1186	Result.SetKind(tok::less);
				1187	}
				1188	break;
				1189	case '>':
				1190	Char = getCharAndSize(CurPtr, SizeTmp);
				1191	if (Char == '=') {
				1192	Result.SetKind(tok::greaterequal);
				1193	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1194	} else if (Char == '>' &&
				1195	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1196	Result.SetKind(tok::greatergreaterequal);
				1197	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1198	SizeTmp2, Result);
				1199	} else if (Char == '>') {
				1200	Result.SetKind(tok::greatergreater);
				1201	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1202	} else if (Features.CPPMinMax && Char == '?') {
				1203	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1204	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1205
				1206	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1207	Result.SetKind(tok::greaterquestionequal); // >?=
				1208	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1209	} else {
				1210	Result.SetKind(tok::greaterquestion); // >?
				1211	}
				1212	} else {
				1213	Result.SetKind(tok::greater);
				1214	}
				1215	break;
				1216	case '^':
				1217	Char = getCharAndSize(CurPtr, SizeTmp);
				1218	if (Char == '=') {
				1219	Result.SetKind(tok::caretequal);
				1220	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1221	} else {
				1222	Result.SetKind(tok::caret);
				1223	}
				1224	break;
				1225	case '\|':
				1226	Char = getCharAndSize(CurPtr, SizeTmp);
				1227	if (Char == '=') {
				1228	Result.SetKind(tok::pipeequal);
				1229	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1230	} else if (Char == '\|') {
				1231	Result.SetKind(tok::pipepipe);
				1232	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1233	} else {
				1234	Result.SetKind(tok::pipe);
				1235	}
				1236	break;
				1237	case ':':
				1238	Char = getCharAndSize(CurPtr, SizeTmp);
				1239	if (Features.Digraphs && Char == '>') {
				1240	Result.SetKind(tok::r_square); // ':>' -> ']'
				1241	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1242	} else if (Features.CPlusPlus && Char == ':') {
				1243	Result.SetKind(tok::coloncolon);
				1244	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1245	} else {
				1246	Result.SetKind(tok::colon);
				1247	}
				1248	break;
				1249	case ';':
				1250	Result.SetKind(tok::semi);
				1251	break;
				1252	case '=':
				1253	Char = getCharAndSize(CurPtr, SizeTmp);
				1254	if (Char == '=') {
				1255	Result.SetKind(tok::equalequal);
				1256	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1257	} else {
				1258	Result.SetKind(tok::equal);
				1259	}
				1260	break;
				1261	case ',':
				1262	Result.SetKind(tok::comma);
				1263	break;
				1264	case '#':
				1265	Char = getCharAndSize(CurPtr, SizeTmp);
				1266	if (Char == '#') {
				1267	Result.SetKind(tok::hashhash);
				1268	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1269	} else {
				1270	Result.SetKind(tok::hash);
				1271	// We parsed a # character. If this occurs at the start of the line,
				1272	// it's actually the start of a preprocessing directive. Callback to
				1273	// the preprocessor to handle it.
				1274	// FIXME: not in preprocessed mode??
				1275	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1276	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1277	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1278
				1279	// As an optimization, if the preprocessor didn't switch lexers, tail
				1280	// recurse.
				1281	if (PP.isCurrentLexer(this)) {
				1282	// Start a new token. If this is a #include or something, the PP may
				1283	// want us starting at the beginning of the line again. If so, set
				1284	// the StartOfLine flag.
				1285	if (IsAtStartOfLine) {
				1286	Result.SetFlag(LexerToken::StartOfLine);
				1287	IsAtStartOfLine = false;
				1288	}
				1289	goto LexNextToken; // GCC isn't tail call eliminating.
				1290	}
				1291	return PP.Lex(Result);
				1292	}
				1293	}
				1294	break;
				1295
				1296	case '\\':
				1297	// FIXME: handle UCN's.
				1298	// FALL THROUGH.
				1299	default:
				1300	// Objective C support.
				1301	if (CurPtr[-1] == '@' && Features.ObjC1) {
				1302	Result.SetKind(tok::at);
				1303	break;
				1304	} else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1305	Diag(CurPtr-1, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1306	return LexIdentifier(Result, CurPtr);
				1307	}
				1308
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1309	if (!PP.isSkipping()) Diag(CurPtr-1, diag::err_stray_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1310	BufferPtr = CurPtr;
				1311	goto LexNextToken; // GCC isn't tail call eliminating.
				1312	}
				1313
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1314	// Update the location of token as well as BufferPtr.
				1315	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1316	}