Blame - clang/Lex/Lexer.cpp - toolchain/llvm-project

blob: 208f25ac60dcaa3225e1b40ea9017ccdc8cab2ae [file] [log] [blame]

Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1	//===--- Lexer.cpp - C Language Family Lexer ------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the Lexer and LexerToken interfaces.
				11	//
				12	//===----------------------------------------------------------------------===//
				13	//
				14	// TODO: GCC Diagnostics emitted by the lexer:
				15	// PEDWARN: (form feed\|vertical tab) in preprocessing directive
				16	//
				17	// Universal characters, unicode, char mapping:
				18	// WARNING: `%.*s' is not in NFKC
				19	// WARNING: `%.*s' is not in NFC
				20	//
				21	// Other:
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	22	// TODO: Options to support:
				23	// -fexec-charset,-fwide-exec-charset
				24	//
				25	//===----------------------------------------------------------------------===//
				26
				27	#include "clang/Lex/Lexer.h"
				28	#include "clang/Lex/Preprocessor.h"
				29	#include "clang/Basic/Diagnostic.h"
				30	#include "clang/Basic/SourceBuffer.h"
				31	#include "clang/Basic/SourceLocation.h"
				32	#include "llvm/Config/alloca.h"
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	33	#include <cctype>
				34	#include <iostream>
				35	using namespace llvm;
				36	using namespace clang;
				37
				38	static void InitCharacterInfo();
				39
Chris Lattner	4cca5ba	2006-07-02 20:05:54 +0000	[diff] [blame]	40	Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp,
				41	const char BufStart, const char BufEnd)
				42	: BufferPtr(BufStart ? BufStart : File->getBufferStart()),
Chris Lattner	4cca5ba	2006-07-02 20:05:54 +0000	[diff] [blame]	43	BufferEnd(BufEnd ? BufEnd : File->getBufferEnd()),
				44	InputFile(File), CurFileID(fileid), PP(pp), Features(PP.getLangOptions()) {
Chris Lattner	ecfeafe	2006-07-02 21:26:45 +0000	[diff] [blame]	45	Is_PragmaLexer = false;
Chris Lattner	4ec473f	2006-07-03 05:16:05 +0000	[diff] [blame^]	46	IsMainFile = false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	47	InitCharacterInfo();
				48
				49	assert(BufferEnd[0] == 0 &&
				50	"We assume that the input buffer has a null character at the end"
				51	" to simplify lexing!");
				52
				53	// Start of the file is a start of line.
				54	IsAtStartOfLine = true;
				55
				56	// We are not after parsing a #.
				57	ParsingPreprocessorDirective = false;
				58
				59	// We are not after parsing #include.
				60	ParsingFilename = false;
				61	}
				62
Chris Lattner	e3e81ea	2006-07-03 01:13:26 +0000	[diff] [blame]	63	/// Stringify - Convert the specified string into a C string, with surrounding
				64	/// ""'s, and with escaped \ and " characters.
				65	std::string Lexer::Stringify(const std::string &Str) {
				66	std::string Result = Str;
				67	for (unsigned i = 0, e = Result.size(); i != e; ++i) {
				68	if (Result[i] == '\\' \|\| Result[i] == '"') {
				69	Result.insert(Result.begin()+i, '\\');
				70	++i; ++e;
				71	}
				72	}
				73
				74	// Add quotes.
				75	return '"' + Result + '"';
				76	}
				77
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	78
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	79	//===----------------------------------------------------------------------===//
				80	// Character information.
				81	//===----------------------------------------------------------------------===//
				82
				83	static unsigned char CharInfo[256];
				84
				85	enum {
				86	CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0'
				87	CHAR_VERT_WS = 0x02, // '\r', '\n'
				88	CHAR_LETTER = 0x04, // a-z,A-Z
				89	CHAR_NUMBER = 0x08, // 0-9
				90	CHAR_UNDER = 0x10, // _
				91	CHAR_PERIOD = 0x20 // .
				92	};
				93
				94	static void InitCharacterInfo() {
				95	static bool isInited = false;
				96	if (isInited) return;
				97	isInited = true;
				98
				99	// Intiialize the CharInfo table.
				100	// TODO: statically initialize this.
				101	CharInfo[(int)' '] = CharInfo[(int)'\t'] =
				102	CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS;
				103	CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS;
				104
				105	CharInfo[(int)'_'] = CHAR_UNDER;
				106	for (unsigned i = 'a'; i <= 'z'; ++i)
				107	CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER;
				108	for (unsigned i = '0'; i <= '9'; ++i)
				109	CharInfo[i] = CHAR_NUMBER;
				110	}
				111
				112	/// isIdentifierBody - Return true if this is the body character of an
				113	/// identifier, which is [a-zA-Z0-9_].
				114	static inline bool isIdentifierBody(unsigned char c) {
				115	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER);
				116	}
				117
				118	/// isHorizontalWhitespace - Return true if this character is horizontal
				119	/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'.
				120	static inline bool isHorizontalWhitespace(unsigned char c) {
				121	return CharInfo[c] & CHAR_HORZ_WS;
				122	}
				123
				124	/// isWhitespace - Return true if this character is horizontal or vertical
				125	/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false
				126	/// for '\0'.
				127	static inline bool isWhitespace(unsigned char c) {
				128	return CharInfo[c] & (CHAR_HORZ_WS\|CHAR_VERT_WS);
				129	}
				130
				131	/// isNumberBody - Return true if this is the body character of an
				132	/// preprocessing number, which is [a-zA-Z0-9_.].
				133	static inline bool isNumberBody(unsigned char c) {
				134	return CharInfo[c] & (CHAR_LETTER\|CHAR_NUMBER\|CHAR_UNDER\|CHAR_PERIOD);
				135	}
				136
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	137
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	138	//===----------------------------------------------------------------------===//
				139	// Diagnostics forwarding code.
				140	//===----------------------------------------------------------------------===//
				141
				142	/// getSourceLocation - Return a source location identifier for the specified
				143	/// offset in the current file.
				144	SourceLocation Lexer::getSourceLocation(const char *Loc) const {
Chris Lattner	8bbfe46	2006-07-02 22:27:49 +0000	[diff] [blame]	145	assert(Loc >= InputFile->getBufferStart() && Loc <= BufferEnd &&
Chris Lattner	4cca5ba	2006-07-02 20:05:54 +0000	[diff] [blame]	146	"Location out of range for this buffer!");
Chris Lattner	8bbfe46	2006-07-02 22:27:49 +0000	[diff] [blame]	147	return SourceLocation(CurFileID, Loc-InputFile->getBufferStart());
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	148	}
				149
				150
				151	/// Diag - Forwarding function for diagnostics. This translate a source
				152	/// position in the current buffer into a SourceLocation object for rendering.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	153	void Lexer::Diag(const char *Loc, unsigned DiagID,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	154	const std::string &Msg) const {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	155	PP.Diag(getSourceLocation(Loc), DiagID, Msg);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	156	}
				157
				158	//===----------------------------------------------------------------------===//
				159	// Trigraph and Escaped Newline Handling Code.
				160	//===----------------------------------------------------------------------===//
				161
				162	/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair,
				163	/// return the decoded trigraph letter it corresponds to, or '\0' if nothing.
				164	static char GetTrigraphCharForLetter(char Letter) {
				165	switch (Letter) {
				166	default: return 0;
				167	case '=': return '#';
				168	case ')': return ']';
				169	case '(': return '[';
				170	case '!': return '\|';
				171	case '\'': return '^';
				172	case '>': return '}';
				173	case '/': return '\\';
				174	case '<': return '{';
				175	case '-': return '~';
				176	}
				177	}
				178
				179	/// DecodeTrigraphChar - If the specified character is a legal trigraph when
				180	/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled,
				181	/// return the result character. Finally, emit a warning about trigraph use
				182	/// whether trigraphs are enabled or not.
				183	static char DecodeTrigraphChar(const char CP, Lexer L) {
				184	char Res = GetTrigraphCharForLetter(*CP);
				185	if (Res && L) {
				186	if (!L->getFeatures().Trigraphs) {
				187	L->Diag(CP-2, diag::trigraph_ignored);
				188	return 0;
				189	} else {
				190	L->Diag(CP-2, diag::trigraph_converted, std::string()+Res);
				191	}
				192	}
				193	return Res;
				194	}
				195
				196	/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,
				197	/// get its size, and return it. This is tricky in several cases:
				198	/// 1. If currently at the start of a trigraph, we warn about the trigraph,
				199	/// then either return the trigraph (skipping 3 chars) or the '?',
				200	/// depending on whether trigraphs are enabled or not.
				201	/// 2. If this is an escaped newline (potentially with whitespace between
				202	/// the backslash and newline), implicitly skip the newline and return
				203	/// the char after it.
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	204	/// 3. If this is a UCN, return it. FIXME: C++ UCN's?
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	205	///
				206	/// This handles the slow/uncommon case of the getCharAndSize method. Here we
				207	/// know that we can accumulate into Size, and that we have already incremented
				208	/// Ptr by Size bytes.
				209	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	210	/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
				211	/// be updated to match.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	212	///
				213	char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
				214	LexerToken *Tok) {
				215	// If we have a slash, look for an escaped newline.
				216	if (Ptr[0] == '\\') {
				217	++Size;
				218	++Ptr;
				219	Slash:
				220	// Common case, backslash-char where the char is not whitespace.
				221	if (!isWhitespace(Ptr[0])) return '\\';
				222
				223	// See if we have optional whitespace characters followed by a newline.
				224	{
				225	unsigned SizeTmp = 0;
				226	do {
				227	++SizeTmp;
				228	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				229	// Remember that this token needs to be cleaned.
				230	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				231
				232	// Warn if there was whitespace between the backslash and newline.
				233	if (SizeTmp != 1 && Tok)
				234	Diag(Ptr, diag::backslash_newline_space);
				235
				236	// If this is a \r\n or \n\r, skip the newlines.
				237	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				238	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				239	++SizeTmp;
				240
				241	// Found backslash<whitespace><newline>. Parse the char after it.
				242	Size += SizeTmp;
				243	Ptr += SizeTmp;
				244	// Use slow version to accumulate a correct size field.
				245	return getCharAndSizeSlow(Ptr, Size, Tok);
				246	}
				247	} while (isWhitespace(Ptr[SizeTmp]));
				248	}
				249
				250	// Otherwise, this is not an escaped newline, just return the slash.
				251	return '\\';
				252	}
				253
				254	// If this is a trigraph, process it.
				255	if (Ptr[0] == '?' && Ptr[1] == '?') {
				256	// If this is actually a legal trigraph (not something like "??x"), emit
				257	// a trigraph warning. If so, and if trigraphs are enabled, return it.
				258	if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) {
				259	// Remember that this token needs to be cleaned.
				260	if (Tok) Tok->SetFlag(LexerToken::NeedsCleaning);
				261
				262	Ptr += 3;
				263	Size += 3;
				264	if (C == '\\') goto Slash;
				265	return C;
				266	}
				267	}
				268
				269	// If this is neither, return a single character.
				270	++Size;
				271	return *Ptr;
				272	}
				273
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	274
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	275	/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
				276	/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
				277	/// and that we have already incremented Ptr by Size bytes.
				278	///
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	279	/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
				280	/// be updated to match.
				281	char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	282	const LangOptions &Features) {
				283	// If we have a slash, look for an escaped newline.
				284	if (Ptr[0] == '\\') {
				285	++Size;
				286	++Ptr;
				287	Slash:
				288	// Common case, backslash-char where the char is not whitespace.
				289	if (!isWhitespace(Ptr[0])) return '\\';
				290
				291	// See if we have optional whitespace characters followed by a newline.
				292	{
				293	unsigned SizeTmp = 0;
				294	do {
				295	++SizeTmp;
				296	if (Ptr[SizeTmp-1] == '\n' \|\| Ptr[SizeTmp-1] == '\r') {
				297
				298	// If this is a \r\n or \n\r, skip the newlines.
				299	if ((Ptr[SizeTmp] == '\r' \|\| Ptr[SizeTmp] == '\n') &&
				300	Ptr[SizeTmp-1] != Ptr[SizeTmp])
				301	++SizeTmp;
				302
				303	// Found backslash<whitespace><newline>. Parse the char after it.
				304	Size += SizeTmp;
				305	Ptr += SizeTmp;
				306
				307	// Use slow version to accumulate a correct size field.
				308	return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
				309	}
				310	} while (isWhitespace(Ptr[SizeTmp]));
				311	}
				312
				313	// Otherwise, this is not an escaped newline, just return the slash.
				314	return '\\';
				315	}
				316
				317	// If this is a trigraph, process it.
				318	if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') {
				319	// If this is actually a legal trigraph (not something like "??x"), return
				320	// it.
				321	if (char C = GetTrigraphCharForLetter(Ptr[2])) {
				322	Ptr += 3;
				323	Size += 3;
				324	if (C == '\\') goto Slash;
				325	return C;
				326	}
				327	}
				328
				329	// If this is neither, return a single character.
				330	++Size;
				331	return *Ptr;
				332	}
				333
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	334	//===----------------------------------------------------------------------===//
				335	// Helper methods for lexing.
				336	//===----------------------------------------------------------------------===//
				337
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	338	void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	339	// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
				340	unsigned Size;
				341	unsigned char C = *CurPtr++;
				342	while (isIdentifierBody(C)) {
				343	C = *CurPtr++;
				344	}
				345	--CurPtr; // Back up over the skipped character.
				346
				347	// Fast path, no $,\,? in identifier found. '\' might be an escaped newline
				348	// or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	349	// FIXME: UCNs.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	350	if (C != '\\' && C != '?' && (C != '$' \|\| !Features.DollarIdents)) {
				351	FinishIdentifier:
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	352	const char IdStart = BufferPtr, IdEnd = CurPtr;
				353	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	354	Result.SetKind(tok::identifier);
				355
				356	// Look up this token, see if it is a macro, or if it is a language keyword.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	357	IdentifierTokenInfo *II;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	358	if (!Result.needsCleaning()) {
				359	// No cleaning needed, just use the characters from the lexed buffer.
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	360	II = PP.getIdentifierInfo(IdStart, IdEnd);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	361	} else {
				362	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
Chris Lattner	33ce728	2006-06-18 07:35:33 +0000	[diff] [blame]	363	char TmpBuf = (char)alloca(Result.getLength());
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	364	unsigned Size = PP.getSpelling(Result, TmpBuf);
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	365	II = PP.getIdentifierInfo(TmpBuf, TmpBuf+Size);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	366	}
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	367	Result.SetIdentifierInfo(II);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	368
Chris Lattner	c5a0006	2006-06-18 16:41:01 +0000	[diff] [blame]	369	// Finally, now that we know we have an identifier, pass this off to the
				370	// preprocessor, which may macro expand it or something.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	371	return PP.HandleIdentifier(Result);
				372	}
				373
				374	// Otherwise, $,\,? in identifier found. Enter slower path.
				375
				376	C = getCharAndSize(CurPtr, Size);
				377	while (1) {
				378	if (C == '$') {
				379	// If we hit a $ and they are not supported in identifiers, we are done.
				380	if (!Features.DollarIdents) goto FinishIdentifier;
				381
				382	// Otherwise, emit a diagnostic and continue.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	383	Diag(CurPtr, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	384	CurPtr = ConsumeChar(CurPtr, Size, Result);
				385	C = getCharAndSize(CurPtr, Size);
				386	continue;
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	387	} else if (!isIdentifierBody(C)) { // FIXME: UCNs.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	388	// Found end of identifier.
				389	goto FinishIdentifier;
				390	}
				391
				392	// Otherwise, this character is good, consume it.
				393	CurPtr = ConsumeChar(CurPtr, Size, Result);
				394
				395	C = getCharAndSize(CurPtr, Size);
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	396	while (isIdentifierBody(C)) { // FIXME: UCNs.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	397	CurPtr = ConsumeChar(CurPtr, Size, Result);
				398	C = getCharAndSize(CurPtr, Size);
				399	}
				400	}
				401	}
				402
				403
				404	/// LexNumericConstant - Lex the remainer of a integer or floating point
				405	/// constant. From[-1] is the first character lexed. Return the end of the
				406	/// constant.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	407	void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	408	unsigned Size;
				409	char C = getCharAndSize(CurPtr, Size);
				410	char PrevCh = 0;
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	411	while (isNumberBody(C)) { // FIXME: UCNs?
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	412	CurPtr = ConsumeChar(CurPtr, Size, Result);
				413	PrevCh = C;
				414	C = getCharAndSize(CurPtr, Size);
				415	}
				416
				417	// If we fell out, check for a sign, due to 1e+12. If we have one, continue.
				418	if ((C == '-' \|\| C == '+') && (PrevCh == 'E' \|\| PrevCh == 'e'))
				419	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				420
				421	// If we have a hex FP constant, continue.
				422	if (Features.HexFloats &&
				423	(C == '-' \|\| C == '+') && (PrevCh == 'P' \|\| PrevCh == 'p'))
				424	return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
				425
				426	Result.SetKind(tok::numeric_constant);
				427
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	428	// Update the location of token as well as BufferPtr.
				429	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	430	}
				431
				432	/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
				433	/// either " or L".
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	434	void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	435	const char *NulCharacter = 0; // Does this string contain the \0 character?
				436
				437	char C = getAndAdvanceChar(CurPtr, Result);
				438	while (C != '"') {
				439	// Skip escaped characters.
				440	if (C == '\\') {
				441	// Skip the escaped character.
				442	C = getAndAdvanceChar(CurPtr, Result);
				443	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				444	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	445	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	446	BufferPtr = CurPtr-1;
				447	return LexTokenInternal(Result);
				448	} else if (C == 0) {
				449	NulCharacter = CurPtr-1;
				450	}
				451	C = getAndAdvanceChar(CurPtr, Result);
				452	}
				453
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	454	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	455
				456	Result.SetKind(tok::string_literal);
				457
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	458	// Update the location of the token as well as the BufferPtr instance var.
				459	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	460	}
				461
				462	/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
				463	/// after having lexed the '<' character. This is used for #include filenames.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	464	void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	465	const char *NulCharacter = 0; // Does this string contain the \0 character?
				466
				467	char C = getAndAdvanceChar(CurPtr, Result);
				468	while (C != '>') {
				469	// Skip escaped characters.
				470	if (C == '\\') {
				471	// Skip the escaped character.
				472	C = getAndAdvanceChar(CurPtr, Result);
				473	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				474	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	475	Diag(BufferPtr, diag::err_unterminated_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	476	BufferPtr = CurPtr-1;
				477	return LexTokenInternal(Result);
				478	} else if (C == 0) {
				479	NulCharacter = CurPtr-1;
				480	}
				481	C = getAndAdvanceChar(CurPtr, Result);
				482	}
				483
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	484	if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	485
				486	Result.SetKind(tok::angle_string_literal);
				487
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	488	// Update the location of token as well as BufferPtr.
				489	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	490	}
				491
				492
				493	/// LexCharConstant - Lex the remainder of a character constant, after having
				494	/// lexed either ' or L'.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	495	void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	496	const char *NulCharacter = 0; // Does this character contain the \0 character?
				497
				498	// Handle the common case of 'x' and '\y' efficiently.
				499	char C = getAndAdvanceChar(CurPtr, Result);
				500	if (C == '\'') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	501	Diag(BufferPtr, diag::err_empty_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	502	BufferPtr = CurPtr;
				503	return LexTokenInternal(Result);
				504	} else if (C == '\\') {
				505	// Skip the escaped character.
				506	// FIXME: UCN's.
				507	C = getAndAdvanceChar(CurPtr, Result);
				508	}
				509
				510	if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') {
				511	++CurPtr;
				512	} else {
				513	// Fall back on generic code for embedded nulls, newlines, wide chars.
				514	do {
				515	// Skip escaped characters.
				516	if (C == '\\') {
				517	// Skip the escaped character.
				518	C = getAndAdvanceChar(CurPtr, Result);
				519	} else if (C == '\n' \|\| C == '\r' \|\| // Newline.
				520	(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	521	Diag(BufferPtr, diag::err_unterminated_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	522	BufferPtr = CurPtr-1;
				523	return LexTokenInternal(Result);
				524	} else if (C == 0) {
				525	NulCharacter = CurPtr-1;
				526	}
				527	C = getAndAdvanceChar(CurPtr, Result);
				528	} while (C != '\'');
				529	}
				530
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	531	if (NulCharacter) Diag(NulCharacter, diag::null_in_char);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	532
				533	Result.SetKind(tok::char_constant);
				534
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	535	// Update the location of token as well as BufferPtr.
				536	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	537	}
				538
				539	/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
				540	/// Update BufferPtr to point to the next non-whitespace character and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	541	void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	542	// Whitespace - Skip it, then return the token after the whitespace.
				543	unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
				544	while (1) {
				545	// Skip horizontal whitespace very aggressively.
				546	while (isHorizontalWhitespace(Char))
				547	Char = *++CurPtr;
				548
				549	// Otherwise if we something other than whitespace, we're done.
				550	if (Char != '\n' && Char != '\r')
				551	break;
				552
				553	if (ParsingPreprocessorDirective) {
				554	// End of preprocessor directive line, let LexTokenInternal handle this.
				555	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	556	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	557	}
				558
				559	// ok, but handle newline.
				560	// The returned token is at the start of the line.
				561	Result.SetFlag(LexerToken::StartOfLine);
				562	// No leading whitespace seen so far.
				563	Result.ClearFlag(LexerToken::LeadingSpace);
				564	Char = *++CurPtr;
				565	}
				566
				567	// If this isn't immediately after a newline, there is leading space.
				568	char PrevChar = CurPtr[-1];
				569	if (PrevChar != '\n' && PrevChar != '\r')
				570	Result.SetFlag(LexerToken::LeadingSpace);
				571
				572	// If the next token is obviously a // or /* */ comment, skip it efficiently
				573	// too (without going through the big switch stmt).
				574	if (Char == '/' && CurPtr[1] == '/') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	575	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	576	return SkipBCPLComment(Result, CurPtr+1);
				577	}
				578	if (Char == '/' && CurPtr[1] == '*') {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	579	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	580	return SkipBlockComment(Result, CurPtr+2);
				581	}
				582	BufferPtr = CurPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	583	}
				584
				585	// SkipBCPLComment - We have just read the // characters from input. Skip until
				586	// we find the newline character thats terminate the comment. Then update
				587	/// BufferPtr and return.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	588	void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	589	// If BCPL comments aren't explicitly enabled for this language, emit an
				590	// extension warning.
				591	if (!Features.BCPLComment) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	592	Diag(BufferPtr, diag::ext_bcpl_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	593
				594	// Mark them enabled so we only emit one warning for this translation
				595	// unit.
				596	Features.BCPLComment = true;
				597	}
				598
				599	// Scan over the body of the comment. The common case, when scanning, is that
				600	// the comment contains normal ascii characters with nothing interesting in
				601	// them. As such, optimize for this case with the inner loop.
				602	char C;
				603	do {
				604	C = *CurPtr;
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	605	// FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character.
				606	// If we find a \n character, scan backwards, checking to see if it's an
				607	// escaped newline, like we do for block comments.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	608
				609	// Skip over characters in the fast loop.
				610	while (C != 0 && // Potentially EOF.
				611	C != '\\' && // Potentially escaped newline.
				612	C != '?' && // Potentially trigraph.
				613	C != '\n' && C != '\r') // Newline or DOS-style newline.
				614	C = *++CurPtr;
				615
				616	// If this is a newline, we're done.
				617	if (C == '\n' \|\| C == '\r')
				618	break; // Found the newline? Break out!
				619
				620	// Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
				621	// properly decode the character.
				622	const char *OldPtr = CurPtr;
				623	C = getAndAdvanceChar(CurPtr, Result);
				624
				625	// If we read multiple characters, and one of those characters was a \r or
				626	// \n, then we had an escaped newline within the comment. Emit diagnostic.
				627	if (CurPtr != OldPtr+1) {
				628	for (; OldPtr != CurPtr; ++OldPtr)
				629	if (OldPtr[0] == '\n' \|\| OldPtr[0] == '\r') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	630	Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
				631	break;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	632	}
				633	}
				634
				635	if (CurPtr == BufferEnd+1) goto FoundEOF;
				636	} while (C != '\n' && C != '\r');
				637
				638	// Found and did not consume a newline.
				639
				640	// If we are inside a preprocessor directive and we see the end of line,
				641	// return immediately, so that the lexer can return this as an EOM token.
				642	if (ParsingPreprocessorDirective) {
				643	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	644	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	645	}
				646
				647	// Otherwise, eat the \n character. We don't care if this is a \n\r or
				648	// \r\n sequence.
				649	++CurPtr;
				650
				651	// The next returned token is at the start of the line.
				652	Result.SetFlag(LexerToken::StartOfLine);
				653	// No leading whitespace seen so far.
				654	Result.ClearFlag(LexerToken::LeadingSpace);
				655
				656	// It is common for the tokens immediately after a // comment to be
				657	// whitespace (indentation for the next line). Instead of going through the
				658	// big switch, handle it efficiently now.
				659	if (isWhitespace(*CurPtr)) {
				660	Result.SetFlag(LexerToken::LeadingSpace);
				661	return SkipWhitespace(Result, CurPtr+1);
				662	}
				663
				664	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	665	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	666
				667	FoundEOF: // If we ran off the end of the buffer, return EOF.
				668	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	669	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	670	}
				671
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	672	/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
				673	/// character (either \n or \r) is part of an escaped newline sequence. Issue a
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	674	/// diagnostic if so. We know that the is inside of a block comment.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	675	static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
				676	Lexer *L) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	677	assert(CurPtr[0] == '\n' \|\| CurPtr[0] == '\r');
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	678
				679	// Back up off the newline.
				680	--CurPtr;
				681
				682	// If this is a two-character newline sequence, skip the other character.
				683	if (CurPtr[0] == '\n' \|\| CurPtr[0] == '\r') {
				684	// \n\n or \r\r -> not escaped newline.
				685	if (CurPtr[0] == CurPtr[1])
				686	return false;
				687	// \n\r or \r\n -> skip the newline.
				688	--CurPtr;
				689	}
				690
				691	// If we have horizontal whitespace, skip over it. We allow whitespace
				692	// between the slash and newline.
				693	bool HasSpace = false;
				694	while (isHorizontalWhitespace(CurPtr) \|\| CurPtr == 0) {
				695	--CurPtr;
				696	HasSpace = true;
				697	}
				698
				699	// If we have a slash, we know this is an escaped newline.
				700	if (*CurPtr == '\\') {
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	701	if (CurPtr[-1] != '*') return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	702	} else {
				703	// It isn't a slash, is it the ?? / trigraph?
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	704	if (CurPtr[0] != '/' \|\| CurPtr[-1] != '?' \|\| CurPtr[-2] != '?' \|\|
				705	CurPtr[-3] != '*')
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	706	return false;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	707
				708	// This is the trigraph ending the comment. Emit a stern warning!
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	709	CurPtr -= 2;
				710
				711	// If no trigraphs are enabled, warn that we ignored this trigraph and
				712	// ignore this * character.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	713	if (!L->getFeatures().Trigraphs) {
				714	L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	715	return false;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	716	}
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	717	L->Diag(CurPtr, diag::trigraph_ends_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	718	}
				719
				720	// Warn about having an escaped newline between the */ characters.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	721	L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	722
				723	// If there was space between the backslash and newline, warn about it.
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	724	if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	725
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	726	return true;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	727	}
				728
				729	/// SkipBlockComment - We have just read the /* characters from input. Read
				730	/// until we find the */ characters that terminate the comment. Note that we
				731	/// don't bother decoding trigraphs or escaped newlines in block comments,
				732	/// because they cannot cause the comment to end. The only thing that can
				733	/// happen is the comment could end with an escaped newline between the */ end
				734	/// of comment.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	735	void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	736	// Scan one character past where we should, looking for a '/' character. Once
				737	// we find it, check to see if it was preceeded by a *. This common
				738	// optimization helps people who like to put a lot of * characters in their
				739	// comments.
				740	unsigned char C = *CurPtr++;
				741	if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	742	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	743	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	744	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	745	}
				746
				747	while (1) {
				748	// Skip over all non-interesting characters.
				749	// TODO: Vectorize this. Note: memchr on Darwin is slower than this loop.
				750	while (C != '/' && C != '\0')
				751	C = *CurPtr++;
				752
				753	if (C == '/') {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	754	if (CurPtr[-2] == '') // We found the final /. We're done!
				755	break;
				756
				757	if ((CurPtr[-2] == '\n' \|\| CurPtr[-2] == '\r')) {
Chris Lattner	1f58305	2006-06-18 06:53:56 +0000	[diff] [blame]	758	if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	759	// We found the final */, though it had an escaped newline between the
				760	// * and /. We're done!
				761	break;
				762	}
				763	}
				764	if (CurPtr[0] == '*' && CurPtr[1] != '/') {
				765	// If this is a /* inside of the comment, emit a warning. Don't do this
				766	// if this is a /*/, which will end the comment. This misses cases with
				767	// embedded escaped newlines, but oh well.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	768	Diag(CurPtr-1, diag::nested_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	769	}
				770	} else if (C == 0 && CurPtr == BufferEnd+1) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	771	Diag(BufferPtr, diag::err_unterminated_block_comment);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	772	// Note: the user probably forgot a */. We could continue immediately
				773	// after the /*, but this would involve lexing a lot of what really is the
				774	// comment, which surely would confuse the parser.
				775	BufferPtr = CurPtr-1;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	776	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	777	}
				778	C = *CurPtr++;
				779	}
				780
				781	// It is common for the tokens immediately after a /**/ comment to be
				782	// whitespace. Instead of going through the big switch, handle it
				783	// efficiently now.
				784	if (isHorizontalWhitespace(*CurPtr)) {
				785	Result.SetFlag(LexerToken::LeadingSpace);
				786	return SkipWhitespace(Result, CurPtr+1);
				787	}
				788
				789	// Otherwise, just return so that the next character will be lexed as a token.
				790	BufferPtr = CurPtr;
				791	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	792	}
				793
				794	//===----------------------------------------------------------------------===//
				795	// Primary Lexing Entry Points
				796	//===----------------------------------------------------------------------===//
				797
				798	/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
				799	/// (potentially) macro expand the filename.
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame]	800	std::string Lexer::LexIncludeFilename(LexerToken &FilenameTok) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	801	assert(ParsingPreprocessorDirective &&
				802	ParsingFilename == false &&
				803	"Must be in a preprocessing directive!");
				804
				805	// We are now parsing a filename!
				806	ParsingFilename = true;
				807
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame]	808	// Lex the filename.
				809	Lex(FilenameTok);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	810
				811	// We should have gotten the filename now.
				812	ParsingFilename = false;
				813
				814	// No filename?
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame]	815	if (FilenameTok.getKind() == tok::eom) {
				816	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				817	return "";
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	818	}
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	819
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame]	820	// Get the text form of the filename.
				821	std::string Filename = PP.getSpelling(FilenameTok);
				822	assert(!Filename.empty() && "Can't have tokens with empty spellings!");
				823
				824	// Make sure the filename is <x> or "x".
				825	if (Filename[0] == '<') {
				826	if (Filename[Filename.size()-1] != '>') {
				827	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				828	FilenameTok.SetKind(tok::eom);
				829	return "";
				830	}
				831	} else if (Filename[0] == '"') {
				832	if (Filename[Filename.size()-1] != '"') {
				833	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				834	FilenameTok.SetKind(tok::eom);
				835	return "";
				836	}
				837	} else {
				838	PP.Diag(FilenameTok, diag::err_pp_expects_filename);
				839	FilenameTok.SetKind(tok::eom);
				840	return "";
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	841	}
Chris Lattner	269c232	2006-06-25 06:23:00 +0000	[diff] [blame]	842
				843	// Diagnose #include "" as invalid.
				844	if (Filename.size() == 2) {
				845	PP.Diag(FilenameTok, diag::err_pp_empty_filename);
				846	FilenameTok.SetKind(tok::eom);
				847	return "";
				848	}
				849
				850	return Filename;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	851	}
				852
				853	/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
				854	/// uninterpreted string. This switches the lexer out of directive mode.
				855	std::string Lexer::ReadToEndOfLine() {
				856	assert(ParsingPreprocessorDirective && ParsingFilename == false &&
				857	"Must be in a preprocessing directive!");
				858	std::string Result;
				859	LexerToken Tmp;
				860
				861	// CurPtr - Cache BufferPtr in an automatic variable.
				862	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	863	while (1) {
				864	char Char = getAndAdvanceChar(CurPtr, Tmp);
				865	switch (Char) {
				866	default:
				867	Result += Char;
				868	break;
				869	case 0: // Null.
				870	// Found end of file?
				871	if (CurPtr-1 != BufferEnd) {
				872	// Nope, normal character, continue.
				873	Result += Char;
				874	break;
				875	}
				876	// FALL THROUGH.
				877	case '\r':
				878	case '\n':
				879	// Okay, we found the end of the line. First, back up past the \0, \r, \n.
				880	assert(CurPtr[-1] == Char && "Trigraphs for newline?");
				881	BufferPtr = CurPtr-1;
				882
				883	// Next, lex the character, which should handle the EOM transition.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	884	Lex(Tmp);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	885	assert(Tmp.getKind() == tok::eom && "Unexpected token!");
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	886
				887	// Finally, we're done, return the string we found.
				888	return Result;
				889	}
				890	}
				891	}
				892
				893	/// LexEndOfFile - CurPtr points to the end of this file. Handle this
				894	/// condition, reporting diagnostics and handling other edge cases as required.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	895	void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	896	// If we hit the end of the file while parsing a preprocessor directive,
				897	// end the preprocessor directive first. The next token returned will
				898	// then be the end of file.
				899	if (ParsingPreprocessorDirective) {
				900	// Done parsing the "line".
				901	ParsingPreprocessorDirective = false;
				902	Result.SetKind(tok::eom);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	903	// Update the location of token as well as BufferPtr.
				904	FormTokenWithChars(Result, CurPtr);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	905	return;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	906	}
				907
				908	// If we are in a #if directive, emit an error.
				909	while (!ConditionalStack.empty()) {
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	910	PP.Diag(ConditionalStack.back().IfLoc,
				911	diag::err_pp_unterminated_conditional);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	912	ConditionalStack.pop_back();
				913	}
				914
				915	// If the file was empty or didn't end in a newline, issue a pedwarn.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	916	if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
				917	Diag(BufferEnd, diag::ext_no_newline_eof);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	918
				919	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	920	PP.HandleEndOfFile(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	921	}
				922
				923
				924	/// LexTokenInternal - This implements a simple C family lexer. It is an
				925	/// extremely performance critical piece of code. This assumes that the buffer
				926	/// has a null character at the end of the file. Return true if an error
				927	/// occurred and compilation should terminate, false if normal. This returns a
				928	/// preprocessing token, not a normal token, as such, it is an internal
				929	/// interface. It assumes that the Flags of result have been cleared before
				930	/// calling this.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	931	void Lexer::LexTokenInternal(LexerToken &Result) {
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	932	LexNextToken:
				933	// New token, can't need cleaning yet.
				934	Result.ClearFlag(LexerToken::NeedsCleaning);
				935
				936	// CurPtr - Cache BufferPtr in an automatic variable.
				937	const char *CurPtr = BufferPtr;
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	938
				939	unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
				940
				941	// Read a character, advancing over it.
				942	char Char = getAndAdvanceChar(CurPtr, Result);
				943	switch (Char) {
				944	case 0: // Null.
				945	// Found end of file?
				946	if (CurPtr-1 == BufferEnd)
				947	return LexEndOfFile(Result, CurPtr-1); // Retreat back into the file.
				948
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	949	Diag(CurPtr-1, diag::null_in_file);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	950	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	951	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	952	goto LexNextToken; // GCC isn't tail call eliminating.
				953	case '\n':
				954	case '\r':
				955	// If we are inside a preprocessor directive and we see the end of line,
				956	// we know we are done with the directive, so return an EOM token.
				957	if (ParsingPreprocessorDirective) {
				958	// Done parsing the "line".
				959	ParsingPreprocessorDirective = false;
				960
				961	// Since we consumed a newline, we are back at the start of a line.
				962	IsAtStartOfLine = true;
				963
				964	Result.SetKind(tok::eom);
				965	break;
				966	}
				967	// The returned token is at the start of the line.
				968	Result.SetFlag(LexerToken::StartOfLine);
				969	// No leading whitespace seen so far.
				970	Result.ClearFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	971	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	972	goto LexNextToken; // GCC isn't tail call eliminating.
				973	case ' ':
				974	case '\t':
				975	case '\f':
				976	case '\v':
				977	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	978	SkipWhitespace(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	979	goto LexNextToken; // GCC isn't tail call eliminating.
				980
				981	case 'L':
				982	Char = getCharAndSize(CurPtr, SizeTmp);
				983
				984	// Wide string literal.
				985	if (Char == '"')
				986	return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				987
				988	// Wide character constant.
				989	if (Char == '\'')
				990	return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				991	// FALL THROUGH, treating L like the start of an identifier.
				992
				993	// C99 6.4.2: Identifiers.
				994	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
				995	case 'H': case 'I': case 'J': case 'K': /'L'/case 'M': case 'N':
				996	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
				997	case 'V': case 'W': case 'X': case 'Y': case 'Z':
				998	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
				999	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
				1000	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
				1001	case 'v': case 'w': case 'x': case 'y': case 'z':
				1002	case '_':
				1003	return LexIdentifier(Result, CurPtr);
				1004
				1005	// C99 6.4.4.1: Integer Constants.
				1006	// C99 6.4.4.2: Floating Constants.
				1007	case '0': case '1': case '2': case '3': case '4':
				1008	case '5': case '6': case '7': case '8': case '9':
				1009	return LexNumericConstant(Result, CurPtr);
				1010
				1011	// C99 6.4.4: Character Constants.
				1012	case '\'':
				1013	return LexCharConstant(Result, CurPtr);
				1014
				1015	// C99 6.4.5: String Literals.
				1016	case '"':
				1017	return LexStringLiteral(Result, CurPtr);
				1018
				1019	// C99 6.4.6: Punctuators.
				1020	case '?':
				1021	Result.SetKind(tok::question);
				1022	break;
				1023	case '[':
				1024	Result.SetKind(tok::l_square);
				1025	break;
				1026	case ']':
				1027	Result.SetKind(tok::r_square);
				1028	break;
				1029	case '(':
				1030	Result.SetKind(tok::l_paren);
				1031	break;
				1032	case ')':
				1033	Result.SetKind(tok::r_paren);
				1034	break;
				1035	case '{':
				1036	Result.SetKind(tok::l_brace);
				1037	break;
				1038	case '}':
				1039	Result.SetKind(tok::r_brace);
				1040	break;
				1041	case '.':
				1042	Char = getCharAndSize(CurPtr, SizeTmp);
				1043	if (Char >= '0' && Char <= '9') {
				1044	return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
				1045	} else if (Features.CPlusPlus && Char == '*') {
				1046	Result.SetKind(tok::periodstar);
				1047	CurPtr += SizeTmp;
				1048	} else if (Char == '.' &&
				1049	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') {
				1050	Result.SetKind(tok::ellipsis);
				1051	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1052	SizeTmp2, Result);
				1053	} else {
				1054	Result.SetKind(tok::period);
				1055	}
				1056	break;
				1057	case '&':
				1058	Char = getCharAndSize(CurPtr, SizeTmp);
				1059	if (Char == '&') {
				1060	Result.SetKind(tok::ampamp);
				1061	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1062	} else if (Char == '=') {
				1063	Result.SetKind(tok::ampequal);
				1064	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1065	} else {
				1066	Result.SetKind(tok::amp);
				1067	}
				1068	break;
				1069	case '*':
				1070	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1071	Result.SetKind(tok::starequal);
				1072	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1073	} else {
				1074	Result.SetKind(tok::star);
				1075	}
				1076	break;
				1077	case '+':
				1078	Char = getCharAndSize(CurPtr, SizeTmp);
				1079	if (Char == '+') {
				1080	Result.SetKind(tok::plusplus);
				1081	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1082	} else if (Char == '=') {
				1083	Result.SetKind(tok::plusequal);
				1084	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1085	} else {
				1086	Result.SetKind(tok::plus);
				1087	}
				1088	break;
				1089	case '-':
				1090	Char = getCharAndSize(CurPtr, SizeTmp);
				1091	if (Char == '-') {
				1092	Result.SetKind(tok::minusminus);
				1093	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1094	} else if (Char == '>' && Features.CPlusPlus &&
				1095	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') {
				1096	Result.SetKind(tok::arrowstar); // C++ ->*
				1097	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1098	SizeTmp2, Result);
				1099	} else if (Char == '>') {
				1100	Result.SetKind(tok::arrow);
				1101	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1102	} else if (Char == '=') {
				1103	Result.SetKind(tok::minusequal);
				1104	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1105	} else {
				1106	Result.SetKind(tok::minus);
				1107	}
				1108	break;
				1109	case '~':
				1110	Result.SetKind(tok::tilde);
				1111	break;
				1112	case '!':
				1113	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1114	Result.SetKind(tok::exclaimequal);
				1115	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1116	} else {
				1117	Result.SetKind(tok::exclaim);
				1118	}
				1119	break;
				1120	case '/':
				1121	// 6.4.9: Comments
				1122	Char = getCharAndSize(CurPtr, SizeTmp);
				1123	if (Char == '/') { // BCPL comment.
				1124	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1125	SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1126	goto LexNextToken; // GCC isn't tail call eliminating.
				1127	} else if (Char == '') { // /*/ comment.
				1128	Result.SetFlag(LexerToken::LeadingSpace);
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1129	SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1130	goto LexNextToken; // GCC isn't tail call eliminating.
				1131	} else if (Char == '=') {
				1132	Result.SetKind(tok::slashequal);
				1133	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1134	} else {
				1135	Result.SetKind(tok::slash);
				1136	}
				1137	break;
				1138	case '%':
				1139	Char = getCharAndSize(CurPtr, SizeTmp);
				1140	if (Char == '=') {
				1141	Result.SetKind(tok::percentequal);
				1142	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1143	} else if (Features.Digraphs && Char == '>') {
				1144	Result.SetKind(tok::r_brace); // '%>' -> '}'
				1145	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1146	} else if (Features.Digraphs && Char == ':') {
				1147	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1148	if (getCharAndSize(CurPtr, SizeTmp) == '%' &&
				1149	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') {
				1150	Result.SetKind(tok::hashhash); // '%:%:' -> '##'
				1151	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1152	SizeTmp2, Result);
				1153	} else {
				1154	Result.SetKind(tok::hash); // '%:' -> '#'
				1155
				1156	// We parsed a # character. If this occurs at the start of the line,
				1157	// it's actually the start of a preprocessing directive. Callback to
				1158	// the preprocessor to handle it.
				1159	// FIXME: -fpreprocessed mode??
				1160	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1161	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1162	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1163
				1164	// As an optimization, if the preprocessor didn't switch lexers, tail
				1165	// recurse.
				1166	if (PP.isCurrentLexer(this)) {
				1167	// Start a new token. If this is a #include or something, the PP may
				1168	// want us starting at the beginning of the line again. If so, set
				1169	// the StartOfLine flag.
				1170	if (IsAtStartOfLine) {
				1171	Result.SetFlag(LexerToken::StartOfLine);
				1172	IsAtStartOfLine = false;
				1173	}
				1174	goto LexNextToken; // GCC isn't tail call eliminating.
				1175	}
				1176
				1177	return PP.Lex(Result);
				1178	}
				1179	}
				1180	} else {
				1181	Result.SetKind(tok::percent);
				1182	}
				1183	break;
				1184	case '<':
				1185	Char = getCharAndSize(CurPtr, SizeTmp);
				1186	if (ParsingFilename) {
				1187	return LexAngledStringLiteral(Result, CurPtr+SizeTmp);
				1188	} else if (Char == '<' &&
				1189	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1190	Result.SetKind(tok::lesslessequal);
				1191	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1192	SizeTmp2, Result);
				1193	} else if (Char == '<') {
				1194	Result.SetKind(tok::lessless);
				1195	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1196	} else if (Char == '=') {
				1197	Result.SetKind(tok::lessequal);
				1198	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1199	} else if (Features.Digraphs && Char == ':') {
				1200	Result.SetKind(tok::l_square); // '<:' -> '['
				1201	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1202	} else if (Features.Digraphs && Char == '>') {
				1203	Result.SetKind(tok::l_brace); // '<%' -> '{'
				1204	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1205	} else if (Features.CPPMinMax && Char == '?') { // <?
				1206	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1207	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1208
				1209	if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?=
				1210	Result.SetKind(tok::lessquestionequal);
				1211	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1212	} else {
				1213	Result.SetKind(tok::lessquestion);
				1214	}
				1215	} else {
				1216	Result.SetKind(tok::less);
				1217	}
				1218	break;
				1219	case '>':
				1220	Char = getCharAndSize(CurPtr, SizeTmp);
				1221	if (Char == '=') {
				1222	Result.SetKind(tok::greaterequal);
				1223	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1224	} else if (Char == '>' &&
				1225	getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
				1226	Result.SetKind(tok::greatergreaterequal);
				1227	CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
				1228	SizeTmp2, Result);
				1229	} else if (Char == '>') {
				1230	Result.SetKind(tok::greatergreater);
				1231	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1232	} else if (Features.CPPMinMax && Char == '?') {
				1233	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1234	Diag(BufferPtr, diag::min_max_deprecated);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1235
				1236	if (getCharAndSize(CurPtr, SizeTmp) == '=') {
				1237	Result.SetKind(tok::greaterquestionequal); // >?=
				1238	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1239	} else {
				1240	Result.SetKind(tok::greaterquestion); // >?
				1241	}
				1242	} else {
				1243	Result.SetKind(tok::greater);
				1244	}
				1245	break;
				1246	case '^':
				1247	Char = getCharAndSize(CurPtr, SizeTmp);
				1248	if (Char == '=') {
				1249	Result.SetKind(tok::caretequal);
				1250	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1251	} else {
				1252	Result.SetKind(tok::caret);
				1253	}
				1254	break;
				1255	case '\|':
				1256	Char = getCharAndSize(CurPtr, SizeTmp);
				1257	if (Char == '=') {
				1258	Result.SetKind(tok::pipeequal);
				1259	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1260	} else if (Char == '\|') {
				1261	Result.SetKind(tok::pipepipe);
				1262	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1263	} else {
				1264	Result.SetKind(tok::pipe);
				1265	}
				1266	break;
				1267	case ':':
				1268	Char = getCharAndSize(CurPtr, SizeTmp);
				1269	if (Features.Digraphs && Char == '>') {
				1270	Result.SetKind(tok::r_square); // ':>' -> ']'
				1271	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1272	} else if (Features.CPlusPlus && Char == ':') {
				1273	Result.SetKind(tok::coloncolon);
				1274	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1275	} else {
				1276	Result.SetKind(tok::colon);
				1277	}
				1278	break;
				1279	case ';':
				1280	Result.SetKind(tok::semi);
				1281	break;
				1282	case '=':
				1283	Char = getCharAndSize(CurPtr, SizeTmp);
				1284	if (Char == '=') {
				1285	Result.SetKind(tok::equalequal);
				1286	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1287	} else {
				1288	Result.SetKind(tok::equal);
				1289	}
				1290	break;
				1291	case ',':
				1292	Result.SetKind(tok::comma);
				1293	break;
				1294	case '#':
				1295	Char = getCharAndSize(CurPtr, SizeTmp);
				1296	if (Char == '#') {
				1297	Result.SetKind(tok::hashhash);
				1298	CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
				1299	} else {
				1300	Result.SetKind(tok::hash);
				1301	// We parsed a # character. If this occurs at the start of the line,
				1302	// it's actually the start of a preprocessing directive. Callback to
				1303	// the preprocessor to handle it.
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	1304	// FIXME: -fpreprocessed mode??
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1305	if (Result.isAtStartOfLine() && !PP.isSkipping()) {
				1306	BufferPtr = CurPtr;
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1307	PP.HandleDirective(Result);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1308
				1309	// As an optimization, if the preprocessor didn't switch lexers, tail
				1310	// recurse.
				1311	if (PP.isCurrentLexer(this)) {
				1312	// Start a new token. If this is a #include or something, the PP may
				1313	// want us starting at the beginning of the line again. If so, set
				1314	// the StartOfLine flag.
				1315	if (IsAtStartOfLine) {
				1316	Result.SetFlag(LexerToken::StartOfLine);
				1317	IsAtStartOfLine = false;
				1318	}
				1319	goto LexNextToken; // GCC isn't tail call eliminating.
				1320	}
				1321	return PP.Lex(Result);
				1322	}
				1323	}
				1324	break;
				1325
				1326	case '\\':
Chris Lattner	505c547	2006-07-03 00:55:48 +0000	[diff] [blame]	1327	// FIXME: UCN's.
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1328	// FALL THROUGH.
				1329	default:
				1330	// Objective C support.
				1331	if (CurPtr[-1] == '@' && Features.ObjC1) {
				1332	Result.SetKind(tok::at);
				1333	break;
				1334	} else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1335	Diag(CurPtr-1, diag::ext_dollar_in_identifier);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1336	return LexIdentifier(Result, CurPtr);
				1337	}
				1338
Chris Lattner	cb28334	2006-06-18 06:48:37 +0000	[diff] [blame]	1339	if (!PP.isSkipping()) Diag(CurPtr-1, diag::err_stray_character);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1340	BufferPtr = CurPtr;
				1341	goto LexNextToken; // GCC isn't tail call eliminating.
				1342	}
				1343
Chris Lattner	d01e291	2006-06-18 16:22:51 +0000	[diff] [blame]	1344	// Update the location of token as well as BufferPtr.
				1345	FormTokenWithChars(Result, CurPtr);
Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame]	1346	}