Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: c589219f06ffafea6467f851100603f556320c63 [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	2	#include "clang/AST/CommentCommandTraits.h"
Fariborz Jahanian	efa78d1	2013-05-04 00:47:28 +0000	[diff] [blame]	3	#include "clang/AST/CommentDiagnostic.h"
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	4	#include "clang/Basic/CharInfo.h"
Dmitri Gribenko	c934dfe	2013-01-19 22:06:05 +0000	[diff] [blame]	5	#include "llvm/ADT/StringExtras.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	6	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	7	#include "llvm/Support/ConvertUTF.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	8	#include "llvm/Support/ErrorHandling.h"
				9
				10	namespace clang {
				11	namespace comments {
				12
				13	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				14	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				15	Loc.dump(SM);
				16	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				17	}
				18
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	19	static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	20	return isLetter(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	21	}
				22
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	23	static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	24	return isDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	25	}
				26
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	27	static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	28	return isHexDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	29	}
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	30
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	31	static inline StringRef convertCodePointToUTF8(
				32	llvm::BumpPtrAllocator &Allocator,
				33	unsigned CodePoint) {
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	34	char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
				35	char *ResolvedPtr = Resolved;
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	36	if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	37	return StringRef(Resolved, ResolvedPtr - Resolved);
				38	else
				39	return StringRef();
				40	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	41
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	42	namespace {
				43
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	44	#include "clang/AST/CommentHTMLTags.inc"
				45	#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
				46
				47	} // unnamed namespace
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	48
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	49	StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	50	// Fast path, first check a few most widely used named character references.
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	51	return llvm::StringSwitch<StringRef>(Name)
				52	.Case("amp", "&")
				53	.Case("lt", "<")
				54	.Case("gt", ">")
				55	.Case("quot", "\"")
				56	.Case("apos", "\'")
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	57	// Slow path.
				58	.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	59	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	60
				61	StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
				62	unsigned CodePoint = 0;
				63	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				64	assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
				65	CodePoint *= 10;
				66	CodePoint += Name[i] - '0';
				67	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	68	return convertCodePointToUTF8(Allocator, CodePoint);
				69	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	70
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	71	StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
				72	unsigned CodePoint = 0;
				73	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				74	CodePoint *= 16;
				75	const char C = Name[i];
				76	assert(isHTMLHexCharacterReferenceCharacter(C));
				77	CodePoint += llvm::hexDigitValue(C);
				78	}
				79	return convertCodePointToUTF8(Allocator, CodePoint);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	80	}
				81
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	82	void Lexer::skipLineStartingDecorations() {
				83	// This function should be called only for C comments
				84	assert(CommentState == LCS_InsideCComment);
				85
				86	if (BufferPtr == CommentEnd)
				87	return;
				88
				89	switch (*BufferPtr) {
				90	case ' ':
				91	case '\t':
				92	case '\f':
				93	case '\v': {
				94	const char *NewBufferPtr = BufferPtr;
				95	NewBufferPtr++;
				96	if (NewBufferPtr == CommentEnd)
				97	return;
				98
				99	char C = *NewBufferPtr;
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	100	while (isHorizontalWhitespace(C)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	101	NewBufferPtr++;
				102	if (NewBufferPtr == CommentEnd)
				103	return;
				104	C = *NewBufferPtr;
				105	}
				106	if (C == '*')
				107	BufferPtr = NewBufferPtr + 1;
				108	break;
				109	}
				110	case '*':
				111	BufferPtr++;
				112	break;
				113	}
				114	}
				115
				116	namespace {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	117	/// Returns pointer to the first newline character in the string.
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	118	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				119	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	120	if (isVerticalWhitespace(*BufferPtr))
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	121	return BufferPtr;
				122	}
				123	return BufferEnd;
				124	}
				125
				126	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				127	if (BufferPtr == BufferEnd)
				128	return BufferPtr;
				129
				130	if (*BufferPtr == '\n')
				131	BufferPtr++;
				132	else {
				133	assert(*BufferPtr == '\r');
				134	BufferPtr++;
				135	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				136	BufferPtr++;
				137	}
				138	return BufferPtr;
				139	}
				140
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	141	const char skipNamedCharacterReference(const char BufferPtr,
				142	const char *BufferEnd) {
				143	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				144	if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
				145	return BufferPtr;
				146	}
				147	return BufferEnd;
				148	}
				149
				150	const char skipDecimalCharacterReference(const char BufferPtr,
				151	const char *BufferEnd) {
				152	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				153	if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
				154	return BufferPtr;
				155	}
				156	return BufferEnd;
				157	}
				158
				159	const char skipHexCharacterReference(const char BufferPtr,
				160	const char *BufferEnd) {
				161	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				162	if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
				163	return BufferPtr;
				164	}
				165	return BufferEnd;
				166	}
				167
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	168	bool isHTMLIdentifierStartingCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	169	return isLetter(C);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	170	}
				171
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	172	bool isHTMLIdentifierCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	173	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	174	}
				175
				176	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				177	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				178	if (!isHTMLIdentifierCharacter(*BufferPtr))
				179	return BufferPtr;
				180	}
				181	return BufferEnd;
				182	}
				183
				184	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				185	/// string allowed.
				186	///
				187	/// Returns pointer to closing quote.
				188	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				189	{
				190	const char Quote = *BufferPtr;
				191	assert(Quote == '\"' \|\| Quote == '\'');
				192
				193	BufferPtr++;
				194	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				195	const char C = *BufferPtr;
				196	if (C == Quote && BufferPtr[-1] != '\\')
				197	return BufferPtr;
				198	}
				199	return BufferEnd;
				200	}
				201
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	202	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				203	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				204	if (!isWhitespace(*BufferPtr))
				205	return BufferPtr;
				206	}
				207	return BufferEnd;
				208	}
				209
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	210	bool isWhitespace(const char BufferPtr, const char BufferEnd) {
				211	return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
				212	}
				213
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	214	bool isCommandNameStartCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	215	return isLetter(C);
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	216	}
				217
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	218	bool isCommandNameCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	219	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	220	}
				221
				222	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				223	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				224	if (!isCommandNameCharacter(*BufferPtr))
				225	return BufferPtr;
				226	}
				227	return BufferEnd;
				228	}
				229
				230	/// Return the one past end pointer for BCPL comments.
				231	/// Handles newlines escaped with backslash or trigraph for backslahs.
				232	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				233	const char *CurPtr = BufferPtr;
				234	while (CurPtr != BufferEnd) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	235	while (!isVerticalWhitespace(*CurPtr)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	236	CurPtr++;
				237	if (CurPtr == BufferEnd)
				238	return BufferEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	239	}
				240	// We found a newline, check if it is escaped.
				241	const char *EscapePtr = CurPtr - 1;
				242	while(isHorizontalWhitespace(*EscapePtr))
				243	EscapePtr--;
				244
				245	if (*EscapePtr == '\\' \|\|
				246	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				247	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				248	// We found an escaped newline.
				249	CurPtr = skipNewline(CurPtr, BufferEnd);
				250	} else
				251	return CurPtr; // Not an escaped newline.
				252	}
				253	return BufferEnd;
				254	}
				255
				256	/// Return the one past end pointer for C comments.
				257	/// Very dumb, does not handle escaped newlines or trigraphs.
				258	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				259	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				260	if (BufferPtr == '') {
				261	assert(BufferPtr + 1 != BufferEnd);
				262	if (*(BufferPtr + 1) == '/')
				263	return BufferPtr;
				264	}
				265	}
				266	llvm_unreachable("buffer end hit before '*/' was seen");
				267	}
Fariborz Jahanian	0089bc4	2013-05-08 19:21:00 +0000	[diff] [blame]	268
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	269	} // unnamed namespace
				270
				271	void Lexer::lexCommentText(Token &T) {
				272	assert(CommentState == LCS_InsideBCPLComment \|\|
				273	CommentState == LCS_InsideCComment);
				274
				275	switch (State) {
				276	case LS_Normal:
				277	break;
				278	case LS_VerbatimBlockFirstLine:
				279	lexVerbatimBlockFirstLine(T);
				280	return;
				281	case LS_VerbatimBlockBody:
				282	lexVerbatimBlockBody(T);
				283	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	284	case LS_VerbatimLineText:
				285	lexVerbatimLineText(T);
				286	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	287	case LS_HTMLStartTag:
				288	lexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	289	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	290	case LS_HTMLEndTag:
				291	lexHTMLEndTag(T);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	292	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	293	}
				294
				295	assert(State == LS_Normal);
				296
				297	const char *TokenPtr = BufferPtr;
				298	assert(TokenPtr < CommentEnd);
				299	while (TokenPtr != CommentEnd) {
				300	switch(*TokenPtr) {
				301	case '\\':
				302	case '@': {
Dmitri Gribenko	808383d	2013-03-04 23:06:15 +0000	[diff] [blame]	303	// Commands that start with a backslash and commands that start with
				304	// 'at' have equivalent semantics. But we keep information about the
				305	// exact syntax in AST for comments.
				306	tok::TokenKind CommandKind =
				307	(*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	308	TokenPtr++;
				309	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	310	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	311	return;
				312	}
				313	char C = *TokenPtr;
				314	switch (C) {
				315	default:
				316	break;
				317
				318	case '\\': case '@': case '&': case '$':
				319	case '#': case '<': case '>': case '%':
				320	case '\"': case '.': case ':':
				321	// This is one of \\ \@ \& \$ etc escape sequences.
				322	TokenPtr++;
				323	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				324	// This is the \:: escape sequence.
				325	TokenPtr++;
				326	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	327	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	328	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	329	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	330	return;
				331	}
				332
				333	// Don't make zero-length commands.
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	334	if (!isCommandNameStartCharacter(*TokenPtr)) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	335	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	336	return;
				337	}
				338
				339	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				340	unsigned Length = TokenPtr - (BufferPtr + 1);
				341
				342	// Hardcoded support for lexing LaTeX formula commands
				343	// \f$ \f[ \f] \f{ \f} as a single command.
				344	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				345	C = *TokenPtr;
				346	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				347	TokenPtr++;
				348	Length++;
				349	}
				350	}
				351
				352	const StringRef CommandName(BufferPtr + 1, Length);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	353
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	354	const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
				355	if (!Info) {
				356	formTokenWithChars(T, TokenPtr, tok::unknown_command);
				357	T.setUnknownCommandName(CommandName);
Fariborz Jahanian	5cd4c41	2013-05-09 16:44:02 +0000	[diff] [blame^]	358	// single character command impostures, such as \t or \n must not go
				359	// through the fixit logic.
				360	if (CommandName.size() <= 1)
				361	return;
Fariborz Jahanian	abbfa67	2013-05-08 20:29:57 +0000	[diff] [blame]	362	if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
Fariborz Jahanian	0089bc4	2013-05-08 19:21:00 +0000	[diff] [blame]	363	StringRef CorrectedName = Info->Name;
				364	SourceRange CommandRange(T.getLocation().getLocWithOffset(1),
				365	T.getEndLocation());
				366	Diag(T.getLocation(), diag::warn_correct_comment_command_name)
				367	<< CommandName << CorrectedName
				368	<< FixItHint::CreateReplacement(CommandRange, CorrectedName);
				369	} else {
				370	Diag(T.getLocation(), diag::warn_unknown_comment_command_name);
				371	return;
				372	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	373	}
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	374	if (Info->IsVerbatimBlockCommand) {
				375	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
				376	return;
				377	}
				378	if (Info->IsVerbatimLineCommand) {
				379	setupAndLexVerbatimLine(T, TokenPtr, Info);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	380	return;
				381	}
Dmitri Gribenko	808383d	2013-03-04 23:06:15 +0000	[diff] [blame]	382	formTokenWithChars(T, TokenPtr, CommandKind);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	383	T.setCommandID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	384	return;
				385	}
				386
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	387	case '&':
				388	lexHTMLCharacterReference(T);
				389	return;
				390
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	391	case '<': {
				392	TokenPtr++;
				393	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	394	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	395	return;
				396	}
				397	const char C = *TokenPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	398	if (isHTMLIdentifierStartingCharacter(C))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	399	setupAndLexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	400	else if (C == '/')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	401	setupAndLexHTMLEndTag(T);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	402	else
				403	formTextToken(T, TokenPtr);
				404
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	405	return;
				406	}
				407
				408	case '\n':
				409	case '\r':
				410	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				411	formTokenWithChars(T, TokenPtr, tok::newline);
				412
				413	if (CommentState == LCS_InsideCComment)
				414	skipLineStartingDecorations();
				415	return;
				416
				417	default: {
Dmitri Gribenko	aa7dbaf	2012-12-30 19:45:46 +0000	[diff] [blame]	418	size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
				419	find_first_of("\n\r\\@&<");
				420	if (End != StringRef::npos)
				421	TokenPtr += End;
				422	else
				423	TokenPtr = CommentEnd;
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	424	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	425	return;
				426	}
				427	}
				428	}
				429	}
				430
				431	void Lexer::setupAndLexVerbatimBlock(Token &T,
				432	const char *TextBegin,
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	433	char Marker, const CommandInfo *Info) {
				434	assert(Info->IsVerbatimBlockCommand);
				435
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	436	VerbatimBlockEndCommandName.clear();
				437	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	438	VerbatimBlockEndCommandName.append(Info->EndCommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	439
				440	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	441	T.setVerbatimBlockID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	442
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	443	// If there is a newline following the verbatim opening command, skip the
				444	// newline so that we don't create an tok::verbatim_block_line with empty
				445	// text content.
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	446	if (BufferPtr != CommentEnd &&
				447	isVerticalWhitespace(*BufferPtr)) {
				448	BufferPtr = skipNewline(BufferPtr, CommentEnd);
				449	State = LS_VerbatimBlockBody;
				450	return;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	451	}
				452
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	453	State = LS_VerbatimBlockFirstLine;
				454	}
				455
				456	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	457	again:
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	458	assert(BufferPtr < CommentEnd);
				459
				460	// FIXME: It would be better to scan the text once, finding either the block
				461	// end command or newline.
				462	//
				463	// Extract current line.
				464	const char *Newline = findNewline(BufferPtr, CommentEnd);
				465	StringRef Line(BufferPtr, Newline - BufferPtr);
				466
				467	// Look for end command in current line.
				468	size_t Pos = Line.find(VerbatimBlockEndCommandName);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	469	const char *TextEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	470	const char *NextLine;
				471	if (Pos == StringRef::npos) {
				472	// Current line is completely verbatim.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	473	TextEnd = Newline;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	474	NextLine = skipNewline(Newline, CommentEnd);
				475	} else if (Pos == 0) {
				476	// Current line contains just an end command.
				477	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	478	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	479	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	480	T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	481	State = LS_Normal;
				482	return;
				483	} else {
				484	// There is some text, followed by end command. Extract text first.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	485	TextEnd = BufferPtr + Pos;
				486	NextLine = TextEnd;
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	487	// If there is only whitespace before end command, skip whitespace.
				488	if (isWhitespace(BufferPtr, TextEnd)) {
				489	BufferPtr = TextEnd;
				490	goto again;
				491	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	492	}
				493
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	494	StringRef Text(BufferPtr, TextEnd - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	495	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	496	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	497
				498	State = LS_VerbatimBlockBody;
				499	}
				500
				501	void Lexer::lexVerbatimBlockBody(Token &T) {
				502	assert(State == LS_VerbatimBlockBody);
				503
				504	if (CommentState == LCS_InsideCComment)
				505	skipLineStartingDecorations();
				506
				507	lexVerbatimBlockFirstLine(T);
				508	}
				509
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	510	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
				511	const CommandInfo *Info) {
				512	assert(Info->IsVerbatimLineCommand);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	513	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	514	T.setVerbatimLineID(Info->getID());
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	515
				516	State = LS_VerbatimLineText;
				517	}
				518
				519	void Lexer::lexVerbatimLineText(Token &T) {
				520	assert(State == LS_VerbatimLineText);
				521
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	522	// Extract current line.
				523	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	524	const StringRef Text(BufferPtr, Newline - BufferPtr);
				525	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	526	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	527
				528	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	529	}
				530
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	531	void Lexer::lexHTMLCharacterReference(Token &T) {
				532	const char *TokenPtr = BufferPtr;
				533	assert(*TokenPtr == '&');
				534	TokenPtr++;
				535	if (TokenPtr == CommentEnd) {
				536	formTextToken(T, TokenPtr);
				537	return;
				538	}
				539	const char *NamePtr;
				540	bool isNamed = false;
				541	bool isDecimal = false;
				542	char C = *TokenPtr;
				543	if (isHTMLNamedCharacterReferenceCharacter(C)) {
				544	NamePtr = TokenPtr;
				545	TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
				546	isNamed = true;
				547	} else if (C == '#') {
				548	TokenPtr++;
				549	if (TokenPtr == CommentEnd) {
				550	formTextToken(T, TokenPtr);
				551	return;
				552	}
				553	C = *TokenPtr;
				554	if (isHTMLDecimalCharacterReferenceCharacter(C)) {
				555	NamePtr = TokenPtr;
				556	TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
				557	isDecimal = true;
				558	} else if (C == 'x' \|\| C == 'X') {
				559	TokenPtr++;
				560	NamePtr = TokenPtr;
				561	TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
				562	} else {
				563	formTextToken(T, TokenPtr);
				564	return;
				565	}
				566	} else {
				567	formTextToken(T, TokenPtr);
				568	return;
				569	}
				570	if (NamePtr == TokenPtr \|\| TokenPtr == CommentEnd \|\|
				571	*TokenPtr != ';') {
				572	formTextToken(T, TokenPtr);
				573	return;
				574	}
				575	StringRef Name(NamePtr, TokenPtr - NamePtr);
				576	TokenPtr++; // Skip semicolon.
				577	StringRef Resolved;
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	578	if (isNamed)
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	579	Resolved = resolveHTMLNamedCharacterReference(Name);
				580	else if (isDecimal)
				581	Resolved = resolveHTMLDecimalCharacterReference(Name);
				582	else
				583	Resolved = resolveHTMLHexCharacterReference(Name);
				584
				585	if (Resolved.empty()) {
				586	formTextToken(T, TokenPtr);
				587	return;
				588	}
				589	formTokenWithChars(T, TokenPtr, tok::text);
				590	T.setText(Resolved);
				591	return;
				592	}
				593
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	594	void Lexer::setupAndLexHTMLStartTag(Token &T) {
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	595	assert(BufferPtr[0] == '<' &&
				596	isHTMLIdentifierStartingCharacter(BufferPtr[1]));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	597	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	598	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	599	if (!isHTMLTagName(Name)) {
				600	formTextToken(T, TagNameEnd);
				601	return;
				602	}
				603
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	604	formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
				605	T.setHTMLTagStartName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	606
				607	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				608
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	609	const char C = *BufferPtr;
				610	if (BufferPtr != CommentEnd &&
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	611	(C == '>' \|\| C == '/' \|\| isHTMLIdentifierStartingCharacter(C)))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	612	State = LS_HTMLStartTag;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	613	}
				614
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	615	void Lexer::lexHTMLStartTag(Token &T) {
				616	assert(State == LS_HTMLStartTag);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	617
				618	const char *TokenPtr = BufferPtr;
				619	char C = *TokenPtr;
				620	if (isHTMLIdentifierCharacter(C)) {
				621	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	622	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	623	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	624	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	625	} else {
				626	switch (C) {
				627	case '=':
				628	TokenPtr++;
				629	formTokenWithChars(T, TokenPtr, tok::html_equals);
				630	break;
				631	case '\"':
				632	case '\'': {
				633	const char *OpenQuote = TokenPtr;
				634	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				635	const char *ClosingQuote = TokenPtr;
				636	if (TokenPtr != CommentEnd) // Skip closing quote.
				637	TokenPtr++;
				638	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				639	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				640	ClosingQuote - (OpenQuote + 1)));
				641	break;
				642	}
				643	case '>':
				644	TokenPtr++;
				645	formTokenWithChars(T, TokenPtr, tok::html_greater);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	646	State = LS_Normal;
				647	return;
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	648	case '/':
				649	TokenPtr++;
				650	if (TokenPtr != CommentEnd && *TokenPtr == '>') {
				651	TokenPtr++;
				652	formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	653	} else
				654	formTextToken(T, TokenPtr);
				655
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	656	State = LS_Normal;
				657	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	658	}
				659	}
				660
				661	// Now look ahead and return to normal state if we don't see any HTML tokens
				662	// ahead.
				663	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				664	if (BufferPtr == CommentEnd) {
				665	State = LS_Normal;
				666	return;
				667	}
				668
				669	C = *BufferPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	670	if (!isHTMLIdentifierStartingCharacter(C) &&
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	671	C != '=' && C != '\"' && C != '\'' && C != '>') {
				672	State = LS_Normal;
				673	return;
				674	}
				675	}
				676
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	677	void Lexer::setupAndLexHTMLEndTag(Token &T) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	678	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				679
				680	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				681	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	682	StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
				683	if (!isHTMLTagName(Name)) {
				684	formTextToken(T, TagNameEnd);
				685	return;
				686	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	687
				688	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	689
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	690	formTokenWithChars(T, End, tok::html_end_tag);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	691	T.setHTMLTagEndName(Name);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	692
				693	if (BufferPtr != CommentEnd && *BufferPtr == '>')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	694	State = LS_HTMLEndTag;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	695	}
				696
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	697	void Lexer::lexHTMLEndTag(Token &T) {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	698	assert(BufferPtr != CommentEnd && *BufferPtr == '>');
				699
				700	formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
				701	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	702	}
				703
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame]	704	Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
				705	const CommandTraits &Traits,
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	706	SourceLocation FileLoc,
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	707	const char BufferStart, const char BufferEnd):
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame]	708	Allocator(Allocator), Diags(Diags), Traits(Traits),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	709	BufferStart(BufferStart), BufferEnd(BufferEnd),
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	710	FileLoc(FileLoc), BufferPtr(BufferStart),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	711	CommentState(LCS_BeforeComment), State(LS_Normal) {
				712	}
				713
				714	void Lexer::lex(Token &T) {
				715	again:
				716	switch (CommentState) {
				717	case LCS_BeforeComment:
				718	if (BufferPtr == BufferEnd) {
				719	formTokenWithChars(T, BufferPtr, tok::eof);
				720	return;
				721	}
				722
				723	assert(*BufferPtr == '/');
				724	BufferPtr++; // Skip first slash.
				725	switch(*BufferPtr) {
				726	case '/': { // BCPL comment.
				727	BufferPtr++; // Skip second slash.
				728
				729	if (BufferPtr != BufferEnd) {
				730	// Skip Doxygen magic marker, if it is present.
				731	// It might be missing because of a typo //< or /*<, or because we
				732	// merged this non-Doxygen comment into a bunch of Doxygen comments
				733	// around it: /** ... / / ... / /* ... */
				734	const char C = *BufferPtr;
				735	if (C == '/' \|\| C == '!')
				736	BufferPtr++;
				737	}
				738
				739	// Skip less-than symbol that marks trailing comments.
				740	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				741	// are frequent typos.
				742	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				743	BufferPtr++;
				744
				745	CommentState = LCS_InsideBCPLComment;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	746	if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
				747	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	748	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				749	goto again;
				750	}
				751	case '*': { // C comment.
				752	BufferPtr++; // Skip star.
				753
				754	// Skip Doxygen magic marker.
				755	const char C = *BufferPtr;
				756	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				757	BufferPtr++;
				758
				759	// Skip less-than symbol that marks trailing comments.
				760	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				761	BufferPtr++;
				762
				763	CommentState = LCS_InsideCComment;
				764	State = LS_Normal;
				765	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				766	goto again;
				767	}
				768	default:
				769	llvm_unreachable("second character of comment should be '/' or '*'");
				770	}
				771
				772	case LCS_BetweenComments: {
				773	// Consecutive comments are extracted only if there is only whitespace
				774	// between them. So we can search for the start of the next comment.
				775	const char *EndWhitespace = BufferPtr;
				776	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				777	EndWhitespace++;
				778
				779	// Turn any whitespace between comments (and there is only whitespace
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	780	// between them -- guaranteed by comment extraction) into a newline. We
				781	// have two newlines between C comments in total (first one was synthesized
				782	// after a comment).
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	783	formTokenWithChars(T, EndWhitespace, tok::newline);
				784
				785	CommentState = LCS_BeforeComment;
				786	break;
				787	}
				788
				789	case LCS_InsideBCPLComment:
				790	case LCS_InsideCComment:
				791	if (BufferPtr != CommentEnd) {
				792	lexCommentText(T);
				793	break;
				794	} else {
				795	// Skip C comment closing sequence.
				796	if (CommentState == LCS_InsideCComment) {
				797	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				798	BufferPtr += 2;
				799	assert(BufferPtr <= BufferEnd);
				800
				801	// Synthenize newline just after the C comment, regardless if there is
				802	// actually a newline.
				803	formTokenWithChars(T, BufferPtr, tok::newline);
				804
				805	CommentState = LCS_BetweenComments;
				806	break;
				807	} else {
				808	// Don't synthesized a newline after BCPL comment.
				809	CommentState = LCS_BetweenComments;
				810	goto again;
				811	}
				812	}
				813	}
				814	}
				815
				816	StringRef Lexer::getSpelling(const Token &Tok,
				817	const SourceManager &SourceMgr,
				818	bool *Invalid) const {
				819	SourceLocation Loc = Tok.getLocation();
				820	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				821
				822	bool InvalidTemp = false;
				823	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				824	if (InvalidTemp) {
				825	*Invalid = true;
				826	return StringRef();
				827	}
				828
				829	const char *Begin = File.data() + LocInfo.second;
				830	return StringRef(Begin, Tok.getLength());
				831	}
				832
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	833	} // end namespace comments
				834	} // end namespace clang
				835