Blame - clang/Lex/Lexer.cpp - toolchain/llvm-project

blob: b183c4d1ea9d392fa8c15118d9698afac2a8e80e [file] [log] [blame]

Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1	//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the Lexer and LexerToken interfaces.
				11	//
				12	//===----------------------------------------------------------------------===//
				13	//
				14	// TODO: GCC Diagnostics emitted by the lexer:
				15	// PEDWARN: (form feed\|vertical tab) in preprocessing directive
				16	//
				17	// Universal characters, unicode, char mapping:
				18	// WARNING: `%.*s' is not in NFKC
				19	// WARNING: `%.*s' is not in NFC
				20	//
				21	// Other:
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	22	// TODO: Options to support:
				23	// -fexec-charset,-fwide-exec-charset
				24	//
				25	//===----------------------------------------------------------------------===//
				26
				27	#include "clang/Lex/Lexer.h"
				28	#include "clang/Lex/Preprocessor.h"
				29	#include "clang/Basic/Diagnostic.h"
				30	#include "clang/Basic/SourceBuffer.h"
				31	#include "clang/Basic/SourceLocation.h"
				32	#include "llvm/Config/alloca.h"
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	33	#include <cctype>
				34	#include <iostream>
				35	using namespace llvm;
				36	using namespace clang;
				37
				38	static void InitCharacterInfo();
				39
				40	Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp)
				41	: BufferPtr(File->getBufferStart()), BufferStart(BufferPtr),
				42	BufferEnd(File->getBufferEnd()), InputFile(File), CurFileID(fileid), PP(pp),
				43	Features(PP.getLangOptions()) {
				44	InitCharacterInfo();
				45
				46	assert(BufferEnd[0] == 0 &&
				47	"We assume that the input buffer has a null character at the end"
				48	" to simplify lexing!");
				49
				50	// Start of the file is a start of line.
				51	IsAtStartOfLine = true;
				52
				53	// We are not after parsing a #.
				54	ParsingPreprocessorDirective = false;
				55
				56	// We are not after parsing #include.
				57	ParsingFilename = false;
				58	}
				59
				60	//===----------------------------------------------------------------------===//
				61	// LexerToken implementation.
				62	//===----------------------------------------------------------------------===//
				63
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	64	//===----------------------------------------------------------------------===//
				65	// Character information.
				66	//===----------------------------------------------------------------------===//
				67
				68	static unsigned char CharInfo[256];
				69
				70	enum {
				71	CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
				72	CHAR_VERT_WS = 0x02, // '\r', '\n'
				73	CHAR_LETTER = 0x04, // a-z,A-Z
				74	CHAR_NUMBER = 0x08, // 0-9
				75	CHAR_UNDER = 0x10, // _
				76	CHAR_PERIOD = 0x20 // .
				77	};
				78
				79	static void InitCharacterInfo() {
				80	static bool isInited = false;
				81	if (isInited) return;
				82	isInited = true;
				83
				84	// Intiialize the CharInfo table.
				85	// TODO: statically initialize this.
				86	CharInfo[(int)' '] = CharInfo[(int)'\t'] =
				87	CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
				88	CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
				89
				90	CharInfo[(int)'_'] = CHAR_UNDER;
				91	for (unsigned i = 'a'; i <= 'z'; ++i)
				92	CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
				93	for (unsigned i = '0'; i <= '9'; ++i)
				94	CharInfo[i] = CHAR_NUMBER;
				95	}
				96
				97	/// isIdentifierBody - Return true if this is the body character of an
				98	/// identifier, which is [a-zA-Z0-9_].
				99	static inline bool isIdentifierBody(unsigned char c) {
				100	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER);
				101	}
				102
				103	/// isHorizontalWhitespace - Return true if this character is horizontal
				104	/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'.
				105	static inline bool isHorizontalWhitespace(unsigned char c) {
				106	return CharInfo[c] & CHAR_HORZ_WS;
				107	}
				108
				109	/// isWhitespace - Return true if this character is horizontal or vertical
				110	/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false
				111	/// for '\0'.
				112	static inline bool isWhitespace(unsigned char c) {
				113	return CharInfo[c] & (CHAR_HORZ_WS\|CHAR_VERT_WS);
				114	}
				115
				116	/// isNumberBody - Return true if this is the body character of an
				117	/// preprocessing number, which is [a-zA-Z0-9_.].
				118	static inline bool isNumberBody(unsigned char c) {
				119	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER\|CHAR_PERIOD);
				120	}
				121
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	122
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	123	//===----------------------------------------------------------------------===//
				124	// Diagnostics forwarding code.
				125	//===----------------------------------------------------------------------===//
				126
				127	/// getSourceLocation - Return a source location identifier for the specified
				128	/// offset in the current file.
				129	SourceLocation Lexer::getSourceLocation(const char *Loc) const {
				130	assert(Loc >= InputFile->getBufferStart() && Loc <= InputFile->getBufferEnd()
				131	&& "Location out of range for this buffer!");
				132	return SourceLocation(CurFileID, Loc-InputFile->getBufferStart());
				133	}
				134
				135
				136	/// Diag - Forwarding function for diagnostics. This translate a source
				137	/// position in the current buffer into a SourceLocation object for rendering.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	138	void Lexer::Diag(const char *Loc, unsigned DiagID,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	139	const std::string &Msg) const {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	140	PP.Diag(getSourceLocation(Loc), DiagID, Msg);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	141	}
				142
				143	//===----------------------------------------------------------------------===//
				144	// Trigraph and Escaped Newline Handling Code.
				145	//===----------------------------------------------------------------------===//
				146
				147	/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
				148	/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
				149	static char GetTrigraphCharForLetter(char Letter) {
				150	switch (Letter) {
				151	default: return 0;
				152	case '=': return '#';
				153	case ')': return ']';
				154	case '(': return '[';
				155	case '!': return '\|';
				156	case '\'': return '^';
				157	case '>': return '}';
				158	case '/': return '\\';
				159	case '<': return '{';
				160	case '-': return '~';
				161	}
				162	}
				163
				164	/// DecodeTrigraphChar - If the specified character is a legal trigraph when
				165	/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled,
				166	/// return the result character. Finally, emit a warning about trigraph use
				167	/// whether trigraphs are enabled or not.
				168	static char DecodeTrigraphChar(const char CP, Lexer L) {
				169	char Res = GetTrigraphCharForLetter(*CP);
				170	if (Res && L) {
				171	if (!L->getFeatures().Trigraphs) {
				172	L->Diag(CP-2, diag::trigraph_ignored);
				173	return 0;
				174	} else {
				175	L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
				176	}
				177	}
				178	return Res;
				179	}
				180
				181	/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
				182	/// get its size, and return it. This is tricky in several cases:
				183	/// 1. If currently at the start of a trigraph, we warn about the trigraph,
				184	/// then either return the trigraph (skipping 3 chars) or the '?',
				185	/// depending on whether trigraphs are enabled or not.
				186	/// 2. If this is an escaped newline (potentially with whitespace between
				187	/// the backslash and newline), implicitly skip the newline and return
				188	/// the char after it.
				189	/// 3. If this is a UCN, return it. FIXME: for C++?
				190	///
				191	/// This handles the slow/uncommon case of the getCharAndSize method. Here we
				192	/// know that we can accumulate into Size, and that we have already incremented
				193	/// Ptr by Size bytes.
				194	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	195	/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
				196	/// be updated to match.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	197	///
				198	char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
				199	LexerToken *Tok) {
				200	// If we have a slash, look for an escaped newline.
				201	if (Ptr[0] == '\\') {
				202	++Size;
				203	++Ptr;
				204	Slash:
				205	// Common case, backslash-char where the char is not whitespace.
				206	if (!isWhitespace(Ptr[0])) return '\\';
				207
				208	// See if we have optional whitespace characters followed by a newline.
				209	{
				210	unsigned SizeTmp = 0;
				211	do {
				212	++SizeTmp;
				213	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				214	// Remember that this token needs to be cleaned.
				215	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				216
				217	// Warn if there was whitespace between the backslash and newline.
				218	if (SizeTmp != 1 && Tok)
				219	Diag(Ptr, diag::backslash_newline_space);
				220
				221	// If this is a \r\n or \n\r, skip the newlines.
				222	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				223	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				224	++SizeTmp;
				225
				226	// Found backslash<whitespace><newline>. Parse the char after it.
				227	Size += SizeTmp;
				228	Ptr += SizeTmp;
				229	// Use slow version to accumulate a correct size field.
				230	return getCharAndSizeSlow(Ptr, Size, Tok);
				231	}
				232	} while (isWhitespace(Ptr[SizeTmp]));
				233	}
				234
				235	// Otherwise, this is not an escaped newline, just return the slash.
				236	return '\\';
				237	}
				238
				239	// If this is a trigraph, process it.
				240	if (Ptr[0] == '?' && Ptr[1] == '?') {
				241	// If this is actually a legal trigraph (not something like "??x"), emit
				242	// a trigraph warning. If so, and if trigraphs are enabled, return it.
				243	if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
				244	// Remember that this token needs to be cleaned.
				245	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				246
				247	Ptr += 3;
				248	Size += 3;
				249	if (C == '\\') goto Slash;
				250	return C;
				251	}
				252	}
				253
				254	// If this is neither, return a single character.
				255	++Size;
				256	return *Ptr;
				257	}
				258
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	259
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	260	/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
				261	/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
				262	/// and that we have already incremented Ptr by Size bytes.
				263	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	264	/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
				265	/// be updated to match.
				266	char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	267	const LangOptions &Features) {
				268	// If we have a slash, look for an escaped newline.
				269	if (Ptr[0] == '\\') {
				270	++Size;
				271	++Ptr;
				272	Slash:
				273	// Common case, backslash-char where the char is not whitespace.
				274	if (!isWhitespace(Ptr[0])) return '\\';
				275
				276	// See if we have optional whitespace characters followed by a newline.
				277	{
				278	unsigned SizeTmp = 0;
				279	do {
				280	++SizeTmp;
				281	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				282
				283	// If this is a \r\n or \n\r, skip the newlines.
				284	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				285	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				286	++SizeTmp;
				287
				288	// Found backslash<whitespace><newline>. Parse the char after it.
				289	Size += SizeTmp;
				290	Ptr += SizeTmp;
				291
				292	// Use slow version to accumulate a correct size field.
				293	return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
				294	}
				295	} while (isWhitespace(Ptr[SizeTmp]));
				296	}
				297
				298	// Otherwise, this is not an escaped newline, just return the slash.
				299	return '\\';
				300	}
				301
				302	// If this is a trigraph, process it.
				303	if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
				304	// If this is actually a legal trigraph (not something like "??x"), return
				305	// it.
				306	if (char C = GetTrigraphCharForLetter(Ptr[2])) {
				307	Ptr += 3;
				308	Size += 3;
				309	if (C == '\\') goto Slash;
				310	return C;
				311	}
				312	}
				313
				314	// If this is neither, return a single character.
				315	++Size;
				316	return *Ptr;
				317	}
				318
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	319	//===----------------------------------------------------------------------===//
				320	// Helper methods for lexing.
				321	//===----------------------------------------------------------------------===//
				322
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	323	void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	324	// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
				325	unsigned Size;
				326	unsigned char C = *CurPtr++;
				327	while (isIdentifierBody(C)) {
				328	C = *CurPtr++;
				329	}
				330	--CurPtr; // Back up over the skipped character.
				331
				332	// Fast path, no $,\,? in identifier found. '\' might be an escaped newline
				333	// or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
				334	// FIXME: universal chars.
				335	if (C != '\\' && C != '?' && (C != '$' \|\| !Features.DollarIdents)) {
				336	FinishIdentifier:
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	337	const char IdStart = BufferPtr, IdEnd = CurPtr;
				338	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	339	Result.SetKind(tok::identifier);
				340
				341	// Look up this token, see if it is a macro, or if it is a language keyword.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	342	IdentifierTokenInfo *II;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	343	if (!Result.needsCleaning()) {
				344	// No cleaning needed, just use the characters from the lexed buffer.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	345	II = PP.getIdentifierInfo(IdStart, IdEnd);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	346	} else {
				347	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
Chris Lattner	33ce728	2006-06-18 07:35:33 +0000	[diff] [blame]	348	char TmpBuf = (char)alloca(Result.getLength());
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	349	unsigned Size = PP.getSpelling(Result, TmpBuf);
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	350	II = PP.getIdentifierInfo(TmpBuf, TmpBuf+Size);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	351	}
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	352	Result.SetIdentifierInfo(II);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	353
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	354	// Finally, now that we know we have an identifier, pass this off to the
				355	// preprocessor, which may macro expand it or something.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	356	return PP.HandleIdentifier(Result);
				357	}
				358
				359	// Otherwise, $,\,? in identifier found. Enter slower path.
				360
				361	C = getCharAndSize(CurPtr, Size);
				362	while (1) {
				363	if (C == '$') {
				364	// If we hit a $ and they are not supported in identifiers, we are done.
				365	if (!Features.DollarIdents) goto FinishIdentifier;
				366
				367	// Otherwise, emit a diagnostic and continue.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	368	Diag(CurPtr, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	369	CurPtr = ConsumeChar(CurPtr, Size, Result);
				370	C = getCharAndSize(CurPtr, Size);
				371	continue;
				372	} else if (!isIdentifierBody(C)) { // FIXME: universal chars.
				373	// Found end of identifier.
				374	goto FinishIdentifier;
				375	}
				376
				377	// Otherwise, this character is good, consume it.
				378	CurPtr = ConsumeChar(CurPtr, Size, Result);
				379
				380	C = getCharAndSize(CurPtr, Size);
				381	while (isIdentifierBody(C)) { // FIXME: universal chars.
				382	CurPtr = ConsumeChar(CurPtr, Size, Result);
				383	C = getCharAndSize(CurPtr, Size);
				384	}
				385	}
				386	}
				387
				388
				389	/// LexNumericConstant - Lex the remainer of a integer or floating point
				390	/// constant. From[-1] is the first character lexed. Return the end of the
				391	/// constant.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	392	void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	393	unsigned Size;
				394	char C = getCharAndSize(CurPtr, Size);
				395	char PrevCh = 0;
				396	while (isNumberBody(C)) { // FIXME: universal chars?
				397	CurPtr = ConsumeChar(CurPtr, Size, Result);
				398	PrevCh = C;
				399	C = getCharAndSize(CurPtr, Size);
				400	}
				401
				402	// If we fell out, check for a sign, due to 1e+12. If we have one, continue.
				403	if ((C == '-' \|\| C == '+') && (PrevCh == 'E' \|\| PrevCh == 'e'))
				404	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				405
				406	// If we have a hex FP constant, continue.
				407	if (Features.HexFloats &&
				408	(C == '-' \|\| C == '+') && (PrevCh == 'P' \|\| PrevCh == 'p'))
				409	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				410
				411	Result.SetKind(tok::numeric_constant);
				412
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	413	// Update the location of token as well as BufferPtr.
				414	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	415	}
				416
				417	/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
				418	/// either " or L".
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	419	void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	420	const char *NulCharacter = 0; // Does this string contain the \0 character?
				421
				422	char C = getAndAdvanceChar(CurPtr, Result);
				423	while (C != '"') {
				424	// Skip escaped characters.
				425	if (C == '\\') {
				426	// Skip the escaped character.
				427	C = getAndAdvanceChar(CurPtr, Result);
				428	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				429	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	430	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	431	BufferPtr = CurPtr-1;
				432	return LexTokenInternal(Result);
				433	} else if (C == 0) {
				434	NulCharacter = CurPtr-1;
				435	}
				436	C = getAndAdvanceChar(CurPtr, Result);
				437	}
				438
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	439	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	440
				441	Result.SetKind(tok::string_literal);
				442
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	443	// Update the location of the token as well as the BufferPtr instance var.
				444	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	445	}
				446
				447	/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
				448	/// after having lexed the '<' character. This is used for #include filenames.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	449	void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	450	const char *NulCharacter = 0; // Does this string contain the \0 character?
				451
				452	char C = getAndAdvanceChar(CurPtr, Result);
				453	while (C != '>') {
				454	// Skip escaped characters.
				455	if (C == '\\') {
				456	// Skip the escaped character.
				457	C = getAndAdvanceChar(CurPtr, Result);
				458	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				459	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	460	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	461	BufferPtr = CurPtr-1;
				462	return LexTokenInternal(Result);
				463	} else if (C == 0) {
				464	NulCharacter = CurPtr-1;
				465	}
				466	C = getAndAdvanceChar(CurPtr, Result);
				467	}
				468
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	469	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	470
				471	Result.SetKind(tok::angle_string_literal);
				472
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	473	// Update the location of token as well as BufferPtr.
				474	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	475	}
				476
				477
				478	/// LexCharConstant - Lex the remainder of a character constant, after having
				479	/// lexed either ' or L'.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	480	void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	481	const char *NulCharacter = 0; // Does this character contain the \0 character?
				482
				483	// Handle the common case of 'x' and '\y' efficiently.
				484	char C = getAndAdvanceChar(CurPtr, Result);
				485	if (C == '\'') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	486	Diag(BufferPtr, diag::err_empty_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	487	BufferPtr = CurPtr;
				488	return LexTokenInternal(Result);
				489	} else if (C == '\\') {
				490	// Skip the escaped character.
				491	// FIXME: UCN's.
				492	C = getAndAdvanceChar(CurPtr, Result);
				493	}
				494
				495	if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
				496	++CurPtr;
				497	} else {
				498	// Fall back on generic code for embedded nulls, newlines, wide chars.
				499	do {
				500	// Skip escaped characters.
				501	if (C == '\\') {
				502	// Skip the escaped character.
				503	C = getAndAdvanceChar(CurPtr, Result);
				504	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				505	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	506	Diag(BufferPtr, diag::err_unterminated_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	507	BufferPtr = CurPtr-1;
				508	return LexTokenInternal(Result);
				509	} else if (C == 0) {
				510	NulCharacter = CurPtr-1;
				511	}
				512	C = getAndAdvanceChar(CurPtr, Result);
				513	} while (C != '\'');
				514	}
				515
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	516	if (NulCharacter) Diag(NulCharacter, diag::null_in_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	517
				518	Result.SetKind(tok::char_constant);
				519
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	520	// Update the location of token as well as BufferPtr.
				521	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	522	}
				523
				524	/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
				525	/// Update BufferPtr to point to the next non-whitespace character and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	526	void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	527	// Whitespace - Skip it, then return the token after the whitespace.
				528	unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
				529	while (1) {
				530	// Skip horizontal whitespace very aggressively.
				531	while (isHorizontalWhitespace(Char))
				532	Char = *++CurPtr;
				533
				534	// Otherwise if we something other than whitespace, we're done.
				535	if (Char != '\n' && Char != '\r')
				536	break;
				537
				538	if (ParsingPreprocessorDirective) {
				539	// End of preprocessor directive line, let LexTokenInternal handle this.
				540	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	541	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	542	}
				543
				544	// ok, but handle newline.
				545	// The returned token is at the start of the line.
				546	Result.SetFlag(LexerToken::StartOfLine);
				547	// No leading whitespace seen so far.
				548	Result.ClearFlag(LexerToken::LeadingSpace);
				549	Char = *++CurPtr;
				550	}
				551
				552	// If this isn't immediately after a newline, there is leading space.
				553	char PrevChar = CurPtr[-1];
				554	if (PrevChar != '\n' && PrevChar != '\r')
				555	Result.SetFlag(LexerToken::LeadingSpace);
				556
				557	// If the next token is obviously a // or /* */ comment, skip it efficiently
				558	// too (without going through the big switch stmt).
				559	if (Char == '/' && CurPtr[1] == '/') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	560	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	561	return SkipBCPLComment(Result, CurPtr+1);
				562	}
				563	if (Char == '/' && CurPtr[1] == '*') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	564	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	565	return SkipBlockComment(Result, CurPtr+2);
				566	}
				567	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	568	}
				569
				570	// SkipBCPLComment - We have just read the // characters from input. Skip until
				571	// we find the newline character thats terminate the comment. Then update
				572	/// BufferPtr and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	573	void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	574	// If BCPL comments aren't explicitly enabled for this language, emit an
				575	// extension warning.
				576	if (!Features.BCPLComment) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	577	Diag(BufferPtr, diag::ext_bcpl_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	578
				579	// Mark them enabled so we only emit one warning for this translation
				580	// unit.
				581	Features.BCPLComment = true;
				582	}
				583
				584	// Scan over the body of the comment. The common case, when scanning, is that
				585	// the comment contains normal ascii characters with nothing interesting in
				586	// them. As such, optimize for this case with the inner loop.
				587	char C;
				588	do {
				589	C = *CurPtr;
				590	// FIXME: just scan for a \n or \r character. If we find a \n character,
				591	// scan backwards, checking to see if it's an escaped newline, like we do
				592	// for block comments.
				593
				594	// Skip over characters in the fast loop.
				595	while (C != 0 && // Potentially EOF.
				596	C != '\\' && // Potentially escaped newline.
				597	C != '?' && // Potentially trigraph.
				598	C != '\n' && C != '\r') // Newline or DOS-style newline.
				599	C = *++CurPtr;
				600
				601	// If this is a newline, we're done.
				602	if (C == '\n' \|\| C == '\r')
				603	break; // Found the newline? Break out!
				604
				605	// Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
				606	// properly decode the character.
				607	const char *OldPtr = CurPtr;
				608	C = getAndAdvanceChar(CurPtr, Result);
				609
				610	// If we read multiple characters, and one of those characters was a \r or
				611	// \n, then we had an escaped newline within the comment. Emit diagnostic.
				612	if (CurPtr != OldPtr+1) {
				613	for (; OldPtr != CurPtr; ++OldPtr)
				614	if (OldPtr[0] == '\n' \|\| OldPtr[0] == '\r') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	615	Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
				616	break;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	617	}
				618	}
				619
				620	if (CurPtr == BufferEnd+1) goto FoundEOF;
				621	} while (C != '\n' && C != '\r');
				622
				623	// Found and did not consume a newline.
				624
				625	// If we are inside a preprocessor directive and we see the end of line,
				626	// return immediately, so that the lexer can return this as an EOM token.
				627	if (ParsingPreprocessorDirective) {
				628	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	629	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	630	}
				631
				632	// Otherwise, eat the \n character. We don't care if this is a \n\r or
				633	// \r\n sequence.
				634	++CurPtr;
				635
				636	// The next returned token is at the start of the line.
				637	Result.SetFlag(LexerToken::StartOfLine);
				638	// No leading whitespace seen so far.
				639	Result.ClearFlag(LexerToken::LeadingSpace);
				640
				641	// It is common for the tokens immediately after a // comment to be
				642	// whitespace (indentation for the next line). Instead of going through the
				643	// big switch, handle it efficiently now.
				644	if (isWhitespace(*CurPtr)) {
				645	Result.SetFlag(LexerToken::LeadingSpace);
				646	return SkipWhitespace(Result, CurPtr+1);
				647	}
				648
				649	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	650	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	651
				652	FoundEOF: // If we ran off the end of the buffer, return EOF.
				653	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	654	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	655	}
				656
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	657	/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
				658	/// character (either \n or \r) is part of an escaped newline sequence. Issue a
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	659	/// diagnostic if so. We know that the is inside of a block comment.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	660	static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
				661	Lexer *L) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	662	assert(CurPtr[0] == '\n' \|\| CurPtr[0] == '\r');
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	663
				664	// Back up off the newline.
				665	--CurPtr;
				666
				667	// If this is a two-character newline sequence, skip the other character.
				668	if (CurPtr[0] == '\n' \|\| CurPtr[0] == '\r') {
				669	// \n\n or \r\r -> not escaped newline.
				670	if (CurPtr[0] == CurPtr[1])
				671	return false;
				672	// \n\r or \r\n -> skip the newline.
				673	--CurPtr;
				674	}
				675
				676	// If we have horizontal whitespace, skip over it. We allow whitespace
				677	// between the slash and newline.
				678	bool HasSpace = false;
				679	while (isHorizontalWhitespace(CurPtr) \|\| CurPtr == 0) {
				680	--CurPtr;
				681	HasSpace = true;
				682	}
				683
				684	// If we have a slash, we know this is an escaped newline.
				685	if (*CurPtr == '\\') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	686	if (CurPtr[-1] != '*') return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	687	} else {
				688	// It isn't a slash, is it the ?? / trigraph?
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	689	if (CurPtr[0] != '/' \|\| CurPtr[-1] != '?' \|\| CurPtr[-2] != '?' \|\|
				690	CurPtr[-3] != '*')
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	691	return false;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	692
				693	// This is the trigraph ending the comment. Emit a stern warning!
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	694	CurPtr -= 2;
				695
				696	// If no trigraphs are enabled, warn that we ignored this trigraph and
				697	// ignore this * character.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	698	if (!L->getFeatures().Trigraphs) {
				699	L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	700	return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	701	}
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	702	L->Diag(CurPtr, diag::trigraph_ends_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	703	}
				704
				705	// Warn about having an escaped newline between the */ characters.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	706	L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	707
				708	// If there was space between the backslash and newline, warn about it.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	709	if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	710
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	711	return true;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	712	}
				713
				714	/// SkipBlockComment - We have just read the /* characters from input. Read
				715	/// until we find the */ characters that terminate the comment. Note that we
				716	/// don't bother decoding trigraphs or escaped newlines in block comments,
				717	/// because they cannot cause the comment to end. The only thing that can
				718	/// happen is the comment could end with an escaped newline between the */ end
				719	/// of comment.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	720	void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	721	// Scan one character past where we should, looking for a '/' character. Once
				722	// we find it, check to see if it was preceeded by a *. This common
				723	// optimization helps people who like to put a lot of * characters in their
				724	// comments.
				725	unsigned char C = *CurPtr++;
				726	if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	727	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	728	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	729	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	730	}
				731
				732	while (1) {
				733	// Skip over all non-interesting characters.
				734	// TODO: Vectorize this. Note: memchr on Darwin is slower than this loop.
				735	while (C != '/' && C != '\0')
				736	C = *CurPtr++;
				737
				738	if (C == '/') {
				739	char T;
				740	if (CurPtr[-2] == '') // We found the final /. We're done!
				741	break;
				742
				743	if ((CurPtr[-2] == '\n' \|\| CurPtr[-2] == '\r')) {
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	744	if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	745	// We found the final */, though it had an escaped newline between the
				746	// * and /. We're done!
				747	break;
				748	}
				749	}
				750	if (CurPtr[0] == '*' && CurPtr[1] != '/') {
				751	// If this is a /* inside of the comment, emit a warning. Don't do this
				752	// if this is a /*/, which will end the comment. This misses cases with
				753	// embedded escaped newlines, but oh well.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	754	Diag(CurPtr-1, diag::nested_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	755	}
				756	} else if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	757	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	758	// Note: the user probably forgot a */. We could continue immediately
				759	// after the /*, but this would involve lexing a lot of what really is the
				760	// comment, which surely would confuse the parser.
				761	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	762	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	763	}
				764	C = *CurPtr++;
				765	}
				766
				767	// It is common for the tokens immediately after a /**/ comment to be
				768	// whitespace. Instead of going through the big switch, handle it
				769	// efficiently now.
				770	if (isHorizontalWhitespace(*CurPtr)) {
				771	Result.SetFlag(LexerToken::LeadingSpace);
				772	return SkipWhitespace(Result, CurPtr+1);
				773	}
				774
				775	// Otherwise, just return so that the next character will be lexed as a token.
				776	BufferPtr = CurPtr;
				777	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	778	}
				779
				780	//===----------------------------------------------------------------------===//
				781	// Primary Lexing Entry Points
				782	//===----------------------------------------------------------------------===//
				783
				784	/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
				785	/// (potentially) macro expand the filename.
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame^]	786	std::string Lexer::LexIncludeFilename(LexerToken &FilenameTok) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	787	assert(ParsingPreprocessorDirective &&
				788	ParsingFilename == false &&
				789	"Must be in a preprocessing directive!");
				790
				791	// We are now parsing a filename!
				792	ParsingFilename = true;
				793
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame^]	794	// Lex the filename.
				795	Lex(FilenameTok);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	796
				797	// We should have gotten the filename now.
				798	ParsingFilename = false;
				799
				800	// No filename?
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame^]	801	if (FilenameTok.getKind() == tok::eom) {
				802	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				803	return "";
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	804	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	805
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame^]	806	// Get the text form of the filename.
				807	std::string Filename = PP.getSpelling(FilenameTok);
				808	assert(!Filename.empty() && "Can't have tokens with empty spellings!");
				809
				810	// Make sure the filename is <x> or "x".
				811	if (Filename[0] == '<') {
				812	if (Filename[Filename.size()-1] != '>') {
				813	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				814	FilenameTok.SetKind(tok::eom);
				815	return "";
				816	}
				817	} else if (Filename[0] == '"') {
				818	if (Filename[Filename.size()-1] != '"') {
				819	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				820	FilenameTok.SetKind(tok::eom);
				821	return "";
				822	}
				823	} else {
				824	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				825	FilenameTok.SetKind(tok::eom);
				826	return "";
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	827	}
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame^]	828
				829	// Diagnose #include "" as invalid.
				830	if (Filename.size() == 2) {
				831	PP.Diag(FilenameTok, diag::err_pp_empty_filename);
				832	FilenameTok.SetKind(tok::eom);
				833	return "";
				834	}
				835
				836	return Filename;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	837	}
				838
				839	/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
				840	/// uninterpreted string. This switches the lexer out of directive mode.
				841	std::string Lexer::ReadToEndOfLine() {
				842	assert(ParsingPreprocessorDirective && ParsingFilename == false &&
				843	"Must be in a preprocessing directive!");
				844	std::string Result;
				845	LexerToken Tmp;
				846
				847	// CurPtr - Cache BufferPtr in an automatic variable.
				848	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	849	while (1) {
				850	char Char = getAndAdvanceChar(CurPtr, Tmp);
				851	switch (Char) {
				852	default:
				853	Result += Char;
				854	break;
				855	case 0: // Null.
				856	// Found end of file?
				857	if (CurPtr-1 != BufferEnd) {
				858	// Nope, normal character, continue.
				859	Result += Char;
				860	break;
				861	}
				862	// FALL THROUGH.
				863	case '\r':
				864	case '\n':
				865	// Okay, we found the end of the line. First, back up past the \0, \r, \n.
				866	assert(CurPtr[-1] == Char && "Trigraphs for newline?");
				867	BufferPtr = CurPtr-1;
				868
				869	// Next, lex the character, which should handle the EOM transition.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	870	Lex(Tmp);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	871	assert(Tmp.getKind() == tok::eom && "Unexpected token!");
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	872
				873	// Finally, we're done, return the string we found.
				874	return Result;
				875	}
				876	}
				877	}
				878
				879	/// LexEndOfFile - CurPtr points to the end of this file. Handle this
				880	/// condition, reporting diagnostics and handling other edge cases as required.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	881	void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	882	// If we hit the end of the file while parsing a preprocessor directive,
				883	// end the preprocessor directive first. The next token returned will
				884	// then be the end of file.
				885	if (ParsingPreprocessorDirective) {
				886	// Done parsing the "line".
				887	ParsingPreprocessorDirective = false;
				888	Result.SetKind(tok::eom);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	889	// Update the location of token as well as BufferPtr.
				890	FormTokenWithChars(Result, CurPtr);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	891	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	892	}
				893
				894	// If we are in a #if directive, emit an error.
				895	while (!ConditionalStack.empty()) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	896	PP.Diag(ConditionalStack.back().IfLoc,
				897	diag::err_pp_unterminated_conditional);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	898	ConditionalStack.pop_back();
				899	}
				900
				901	// If the file was empty or didn't end in a newline, issue a pedwarn.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	902	if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
				903	Diag(BufferEnd, diag::ext_no_newline_eof);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	904
				905	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	906	PP.HandleEndOfFile(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	907	}
				908
				909
				910	/// LexTokenInternal - This implements a simple C family lexer. It is an
				911	/// extremely performance critical piece of code. This assumes that the buffer
				912	/// has a null character at the end of the file. Return true if an error
				913	/// occurred and compilation should terminate, false if normal. This returns a
				914	/// preprocessing token, not a normal token, as such, it is an internal
				915	/// interface. It assumes that the Flags of result have been cleared before
				916	/// calling this.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	917	void Lexer::LexTokenInternal(LexerToken &Result) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	918	LexNextToken:
				919	// New token, can't need cleaning yet.
				920	Result.ClearFlag(LexerToken::NeedsCleaning);
				921
				922	// CurPtr - Cache BufferPtr in an automatic variable.
				923	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	924
				925	unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
				926
				927	// Read a character, advancing over it.
				928	char Char = getAndAdvanceChar(CurPtr, Result);
				929	switch (Char) {
				930	case 0: // Null.
				931	// Found end of file?
				932	if (CurPtr-1 == BufferEnd)
				933	return LexEndOfFile(Result, CurPtr-1); // Retreat back into the file.
				934
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	935	Diag(CurPtr-1, diag::null_in_file);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	936	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	937	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	938	goto LexNextToken; // GCC isn't tail call eliminating.
				939	case '\n':
				940	case '\r':
				941	// If we are inside a preprocessor directive and we see the end of line,
				942	// we know we are done with the directive, so return an EOM token.
				943	if (ParsingPreprocessorDirective) {
				944	// Done parsing the "line".
				945	ParsingPreprocessorDirective = false;
				946
				947	// Since we consumed a newline, we are back at the start of a line.
				948	IsAtStartOfLine = true;
				949
				950	Result.SetKind(tok::eom);
				951	break;
				952	}
				953	// The returned token is at the start of the line.
				954	Result.SetFlag(LexerToken::StartOfLine);
				955	// No leading whitespace seen so far.
				956	Result.ClearFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	957	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	958	goto LexNextToken; // GCC isn't tail call eliminating.
				959	case ' ':
				960	case '\t':
				961	case '\f':
				962	case '\v':
				963	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	964	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	965	goto LexNextToken; // GCC isn't tail call eliminating.
				966
				967	case 'L':
				968	Char = getCharAndSize(CurPtr, SizeTmp);
				969
				970	// Wide string literal.
				971	if (Char == '"')
				972	return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				973
				974	// Wide character constant.
				975	if (Char == '\'')
				976	return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				977	// FALL THROUGH, treating L like the start of an identifier.
				978
				979	// C99 6.4.2: Identifiers.
				980	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
				981	case 'H': case 'I': case 'J': case 'K': /'L'/case 'M': case 'N':
				982	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
				983	case 'V': case 'W': case 'X': case 'Y': case 'Z':
				984	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
				985	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
				986	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
				987	case 'v': case 'w': case 'x': case 'y': case 'z':
				988	case '_':
				989	return LexIdentifier(Result, CurPtr);
				990
				991	// C99 6.4.4.1: Integer Constants.
				992	// C99 6.4.4.2: Floating Constants.
				993	case '0': case '1': case '2': case '3': case '4':
				994	case '5': case '6': case '7': case '8': case '9':
				995	return LexNumericConstant(Result, CurPtr);
				996
				997	// C99 6.4.4: Character Constants.
				998	case '\'':
				999	return LexCharConstant(Result, CurPtr);
				1000
				1001	// C99 6.4.5: String Literals.
				1002	case '"':
				1003	return LexStringLiteral(Result, CurPtr);
				1004
				1005	// C99 6.4.6: Punctuators.
				1006	case '?':
				1007	Result.SetKind(tok::question);
				1008	break;
				1009	case '[':
				1010	Result.SetKind(tok::l_square);
				1011	break;
				1012	case ']':
				1013	Result.SetKind(tok::r_square);
				1014	break;
				1015	case '(':
				1016	Result.SetKind(tok::l_paren);
				1017	break;
				1018	case ')':
				1019	Result.SetKind(tok::r_paren);
				1020	break;
				1021	case '{':
				1022	Result.SetKind(tok::l_brace);
				1023	break;
				1024	case '}':
				1025	Result.SetKind(tok::r_brace);
				1026	break;
				1027	case '.':
				1028	Char = getCharAndSize(CurPtr, SizeTmp);
				1029	if (Char >= '0' && Char <= '9') {
				1030	return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				1031	} else if (Features.CPlusPlus && Char == '*') {
				1032	Result.SetKind(tok::periodstar);
				1033	CurPtr += SizeTmp;
				1034	} else if (Char == '.' &&
				1035	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
				1036	Result.SetKind(tok::ellipsis);
				1037	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1038	SizeTmp2, Result);
				1039	} else {
				1040	Result.SetKind(tok::period);
				1041	}
				1042	break;
				1043	case '&':
				1044	Char = getCharAndSize(CurPtr, SizeTmp);
				1045	if (Char == '&') {
				1046	Result.SetKind(tok::ampamp);
				1047	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1048	} else if (Char == '=') {
				1049	Result.SetKind(tok::ampequal);
				1050	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1051	} else {
				1052	Result.SetKind(tok::amp);
				1053	}
				1054	break;
				1055	case '*':
				1056	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1057	Result.SetKind(tok::starequal);
				1058	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1059	} else {
				1060	Result.SetKind(tok::star);
				1061	}
				1062	break;
				1063	case '+':
				1064	Char = getCharAndSize(CurPtr, SizeTmp);
				1065	if (Char == '+') {
				1066	Result.SetKind(tok::plusplus);
				1067	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1068	} else if (Char == '=') {
				1069	Result.SetKind(tok::plusequal);
				1070	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1071	} else {
				1072	Result.SetKind(tok::plus);
				1073	}
				1074	break;
				1075	case '-':
				1076	Char = getCharAndSize(CurPtr, SizeTmp);
				1077	if (Char == '-') {
				1078	Result.SetKind(tok::minusminus);
				1079	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1080	} else if (Char == '>' && Features.CPlusPlus &&
				1081	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
				1082	Result.SetKind(tok::arrowstar); // C++ ->*
				1083	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1084	SizeTmp2, Result);
				1085	} else if (Char == '>') {
				1086	Result.SetKind(tok::arrow);
				1087	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1088	} else if (Char == '=') {
				1089	Result.SetKind(tok::minusequal);
				1090	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1091	} else {
				1092	Result.SetKind(tok::minus);
				1093	}
				1094	break;
				1095	case '~':
				1096	Result.SetKind(tok::tilde);
				1097	break;
				1098	case '!':
				1099	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1100	Result.SetKind(tok::exclaimequal);
				1101	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1102	} else {
				1103	Result.SetKind(tok::exclaim);
				1104	}
				1105	break;
				1106	case '/':
				1107	// 6.4.9: Comments
				1108	Char = getCharAndSize(CurPtr, SizeTmp);
				1109	if (Char == '/') { // BCPL comment.
				1110	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1111	SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1112	goto LexNextToken; // GCC isn't tail call eliminating.
				1113	} else if (Char == '') { // /*/ comment.
				1114	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1115	SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1116	goto LexNextToken; // GCC isn't tail call eliminating.
				1117	} else if (Char == '=') {
				1118	Result.SetKind(tok::slashequal);
				1119	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1120	} else {
				1121	Result.SetKind(tok::slash);
				1122	}
				1123	break;
				1124	case '%':
				1125	Char = getCharAndSize(CurPtr, SizeTmp);
				1126	if (Char == '=') {
				1127	Result.SetKind(tok::percentequal);
				1128	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1129	} else if (Features.Digraphs && Char == '>') {
				1130	Result.SetKind(tok::r_brace); // '%>' -> '}'
				1131	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1132	} else if (Features.Digraphs && Char == ':') {
				1133	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1134	if (getCharAndSize(CurPtr, SizeTmp) == '%' &&
				1135	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
				1136	Result.SetKind(tok::hashhash); // '%:%:' -> '##'
				1137	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1138	SizeTmp2, Result);
				1139	} else {
				1140	Result.SetKind(tok::hash); // '%:' -> '#'
				1141
				1142	// We parsed a # character. If this occurs at the start of the line,
				1143	// it's actually the start of a preprocessing directive. Callback to
				1144	// the preprocessor to handle it.
				1145	// FIXME: -fpreprocessed mode??
				1146	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1147	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1148	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1149
				1150	// As an optimization, if the preprocessor didn't switch lexers, tail
				1151	// recurse.
				1152	if (PP.isCurrentLexer(this)) {
				1153	// Start a new token. If this is a #include or something, the PP may
				1154	// want us starting at the beginning of the line again. If so, set
				1155	// the StartOfLine flag.
				1156	if (IsAtStartOfLine) {
				1157	Result.SetFlag(LexerToken::StartOfLine);
				1158	IsAtStartOfLine = false;
				1159	}
				1160	goto LexNextToken; // GCC isn't tail call eliminating.
				1161	}
				1162
				1163	return PP.Lex(Result);
				1164	}
				1165	}
				1166	} else {
				1167	Result.SetKind(tok::percent);
				1168	}
				1169	break;
				1170	case '<':
				1171	Char = getCharAndSize(CurPtr, SizeTmp);
				1172	if (ParsingFilename) {
				1173	return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
				1174	} else if (Char == '<' &&
				1175	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1176	Result.SetKind(tok::lesslessequal);
				1177	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1178	SizeTmp2, Result);
				1179	} else if (Char == '<') {
				1180	Result.SetKind(tok::lessless);
				1181	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1182	} else if (Char == '=') {
				1183	Result.SetKind(tok::lessequal);
				1184	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1185	} else if (Features.Digraphs && Char == ':') {
				1186	Result.SetKind(tok::l_square); // '<:' -> '['
				1187	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1188	} else if (Features.Digraphs && Char == '>') {
				1189	Result.SetKind(tok::l_brace); // '<%' -> '{'
				1190	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1191	} else if (Features.CPPMinMax && Char == '?') { // <?
				1192	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1193	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1194
				1195	if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?=
				1196	Result.SetKind(tok::lessquestionequal);
				1197	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1198	} else {
				1199	Result.SetKind(tok::lessquestion);
				1200	}
				1201	} else {
				1202	Result.SetKind(tok::less);
				1203	}
				1204	break;
				1205	case '>':
				1206	Char = getCharAndSize(CurPtr, SizeTmp);
				1207	if (Char == '=') {
				1208	Result.SetKind(tok::greaterequal);
				1209	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1210	} else if (Char == '>' &&
				1211	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1212	Result.SetKind(tok::greatergreaterequal);
				1213	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1214	SizeTmp2, Result);
				1215	} else if (Char == '>') {
				1216	Result.SetKind(tok::greatergreater);
				1217	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1218	} else if (Features.CPPMinMax && Char == '?') {
				1219	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1220	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1221
				1222	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1223	Result.SetKind(tok::greaterquestionequal); // >?=
				1224	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1225	} else {
				1226	Result.SetKind(tok::greaterquestion); // >?
				1227	}
				1228	} else {
				1229	Result.SetKind(tok::greater);
				1230	}
				1231	break;
				1232	case '^':
				1233	Char = getCharAndSize(CurPtr, SizeTmp);
				1234	if (Char == '=') {
				1235	Result.SetKind(tok::caretequal);
				1236	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1237	} else {
				1238	Result.SetKind(tok::caret);
				1239	}
				1240	break;
				1241	case '\|':
				1242	Char = getCharAndSize(CurPtr, SizeTmp);
				1243	if (Char == '=') {
				1244	Result.SetKind(tok::pipeequal);
				1245	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1246	} else if (Char == '\|') {
				1247	Result.SetKind(tok::pipepipe);
				1248	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1249	} else {
				1250	Result.SetKind(tok::pipe);
				1251	}
				1252	break;
				1253	case ':':
				1254	Char = getCharAndSize(CurPtr, SizeTmp);
				1255	if (Features.Digraphs && Char == '>') {
				1256	Result.SetKind(tok::r_square); // ':>' -> ']'
				1257	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1258	} else if (Features.CPlusPlus && Char == ':') {
				1259	Result.SetKind(tok::coloncolon);
				1260	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1261	} else {
				1262	Result.SetKind(tok::colon);
				1263	}
				1264	break;
				1265	case ';':
				1266	Result.SetKind(tok::semi);
				1267	break;
				1268	case '=':
				1269	Char = getCharAndSize(CurPtr, SizeTmp);
				1270	if (Char == '=') {
				1271	Result.SetKind(tok::equalequal);
				1272	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1273	} else {
				1274	Result.SetKind(tok::equal);
				1275	}
				1276	break;
				1277	case ',':
				1278	Result.SetKind(tok::comma);
				1279	break;
				1280	case '#':
				1281	Char = getCharAndSize(CurPtr, SizeTmp);
				1282	if (Char == '#') {
				1283	Result.SetKind(tok::hashhash);
				1284	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1285	} else {
				1286	Result.SetKind(tok::hash);
				1287	// We parsed a # character. If this occurs at the start of the line,
				1288	// it's actually the start of a preprocessing directive. Callback to
				1289	// the preprocessor to handle it.
				1290	// FIXME: not in preprocessed mode??
				1291	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1292	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1293	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1294
				1295	// As an optimization, if the preprocessor didn't switch lexers, tail
				1296	// recurse.
				1297	if (PP.isCurrentLexer(this)) {
				1298	// Start a new token. If this is a #include or something, the PP may
				1299	// want us starting at the beginning of the line again. If so, set
				1300	// the StartOfLine flag.
				1301	if (IsAtStartOfLine) {
				1302	Result.SetFlag(LexerToken::StartOfLine);
				1303	IsAtStartOfLine = false;
				1304	}
				1305	goto LexNextToken; // GCC isn't tail call eliminating.
				1306	}
				1307	return PP.Lex(Result);
				1308	}
				1309	}
				1310	break;
				1311
				1312	case '\\':
				1313	// FIXME: handle UCN's.
				1314	// FALL THROUGH.
				1315	default:
				1316	// Objective C support.
				1317	if (CurPtr[-1] == '@' && Features.ObjC1) {
				1318	Result.SetKind(tok::at);
				1319	break;
				1320	} else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1321	Diag(CurPtr-1, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1322	return LexIdentifier(Result, CurPtr);
				1323	}
				1324
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1325	if (!PP.isSkipping()) Diag(CurPtr-1, diag::err_stray_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1326	BufferPtr = CurPtr;
				1327	goto LexNextToken; // GCC isn't tail call eliminating.
				1328	}
				1329
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1330	// Update the location of token as well as BufferPtr.
				1331	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1332	}