Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: c3a801d924c24b944f129b9c7b7752f11a6a9881 [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
				2	#include "llvm/ADT/StringSwitch.h"
				3	#include "llvm/Support/ErrorHandling.h"
				4
				5	namespace clang {
				6	namespace comments {
				7
				8	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				9	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				10	Loc.dump(SM);
				11	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				12	}
				13
				14	bool Lexer::isVerbatimBlockCommand(StringRef BeginName,
				15	StringRef &EndName) const {
				16	const char Result = llvm::StringSwitch<const char >(BeginName)
				17	.Case("code", "endcode")
				18	.Case("verbatim", "endverbatim")
				19	.Case("htmlonly", "endhtmlonly")
				20	.Case("latexonly", "endlatexonly")
				21	.Case("xmlonly", "endxmlonly")
				22	.Case("manonly", "endmanonly")
				23	.Case("rtfonly", "endrtfonly")
				24
				25	.Case("dot", "enddot")
				26	.Case("msc", "endmsc")
				27
				28	.Case("f$", "f$") // Inline LaTeX formula
				29	.Case("f[", "f]") // Displayed LaTeX formula
				30	.Case("f{", "f}") // LaTeX environment
				31
				32	.Default(NULL);
				33
				34	if (Result) {
				35	EndName = Result;
				36	return true;
				37	}
				38
				39	for (VerbatimBlockCommandVector::const_iterator
				40	I = VerbatimBlockCommands.begin(),
				41	E = VerbatimBlockCommands.end();
				42	I != E; ++I)
				43	if (I->BeginName == BeginName) {
				44	EndName = I->EndName;
				45	return true;
				46	}
				47
				48	return false;
				49	}
				50
				51	bool Lexer::isVerbatimLineCommand(StringRef Name) const {
				52	bool Result = llvm::StringSwitch<bool>(Name)
				53	.Case("fn", true)
				54	.Case("var", true)
				55	.Case("property", true)
				56	.Case("typedef", true)
				57
				58	.Case("overload", true)
				59
				60	.Case("defgroup", true)
				61	.Case("ingroup", true)
				62	.Case("addtogroup", true)
				63	.Case("weakgroup", true)
				64	.Case("name", true)
				65
				66	.Case("section", true)
				67	.Case("subsection", true)
				68	.Case("subsubsection", true)
				69	.Case("paragraph", true)
				70
				71	.Case("mainpage", true)
				72	.Case("subpage", true)
				73	.Case("ref", true)
				74
				75	.Default(false);
				76
				77	if (Result)
				78	return true;
				79
				80	for (VerbatimLineCommandVector::const_iterator
				81	I = VerbatimLineCommands.begin(),
				82	E = VerbatimLineCommands.end();
				83	I != E; ++I)
				84	if (I->Name == Name)
				85	return true;
				86
				87	return false;
				88	}
				89
				90	void Lexer::skipLineStartingDecorations() {
				91	// This function should be called only for C comments
				92	assert(CommentState == LCS_InsideCComment);
				93
				94	if (BufferPtr == CommentEnd)
				95	return;
				96
				97	switch (*BufferPtr) {
				98	case ' ':
				99	case '\t':
				100	case '\f':
				101	case '\v': {
				102	const char *NewBufferPtr = BufferPtr;
				103	NewBufferPtr++;
				104	if (NewBufferPtr == CommentEnd)
				105	return;
				106
				107	char C = *NewBufferPtr;
				108	while (C == ' ' \|\| C == '\t' \|\| C == '\f' \|\| C == '\v') {
				109	NewBufferPtr++;
				110	if (NewBufferPtr == CommentEnd)
				111	return;
				112	C = *NewBufferPtr;
				113	}
				114	if (C == '*')
				115	BufferPtr = NewBufferPtr + 1;
				116	break;
				117	}
				118	case '*':
				119	BufferPtr++;
				120	break;
				121	}
				122	}
				123
				124	namespace {
				125	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				126	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				127	const char C = *BufferPtr;
				128	if (C == '\n' \|\| C == '\r')
				129	return BufferPtr;
				130	}
				131	return BufferEnd;
				132	}
				133
				134	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				135	if (BufferPtr == BufferEnd)
				136	return BufferPtr;
				137
				138	if (*BufferPtr == '\n')
				139	BufferPtr++;
				140	else {
				141	assert(*BufferPtr == '\r');
				142	BufferPtr++;
				143	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				144	BufferPtr++;
				145	}
				146	return BufferPtr;
				147	}
				148
				149	bool isHTMLIdentifierCharacter(char C) {
				150	return (C >= 'a' && C <= 'z') \|\|
				151	(C >= 'A' && C <= 'Z') \|\|
				152	(C >= '0' && C <= '9');
				153	}
				154
				155	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				156	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				157	if (!isHTMLIdentifierCharacter(*BufferPtr))
				158	return BufferPtr;
				159	}
				160	return BufferEnd;
				161	}
				162
				163	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				164	/// string allowed.
				165	///
				166	/// Returns pointer to closing quote.
				167	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				168	{
				169	const char Quote = *BufferPtr;
				170	assert(Quote == '\"' \|\| Quote == '\'');
				171
				172	BufferPtr++;
				173	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				174	const char C = *BufferPtr;
				175	if (C == Quote && BufferPtr[-1] != '\\')
				176	return BufferPtr;
				177	}
				178	return BufferEnd;
				179	}
				180
				181	bool isHorizontalWhitespace(char C) {
				182	return C == ' ' \|\| C == '\t' \|\| C == '\f' \|\| C == '\v';
				183	}
				184
				185	bool isWhitespace(char C) {
				186	return C == ' ' \|\| C == '\n' \|\| C == '\r' \|\|
				187	C == '\t' \|\| C == '\f' \|\| C == '\v';
				188	}
				189
				190	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				191	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				192	if (!isWhitespace(*BufferPtr))
				193	return BufferPtr;
				194	}
				195	return BufferEnd;
				196	}
				197
				198	bool isCommandNameCharacter(char C) {
				199	return (C >= 'a' && C <= 'z') \|\|
				200	(C >= 'A' && C <= 'Z') \|\|
				201	(C >= '0' && C <= '9');
				202	}
				203
				204	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				205	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				206	if (!isCommandNameCharacter(*BufferPtr))
				207	return BufferPtr;
				208	}
				209	return BufferEnd;
				210	}
				211
				212	/// Return the one past end pointer for BCPL comments.
				213	/// Handles newlines escaped with backslash or trigraph for backslahs.
				214	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				215	const char *CurPtr = BufferPtr;
				216	while (CurPtr != BufferEnd) {
				217	char C = *CurPtr;
				218	while (C != '\n' && C != '\r') {
				219	CurPtr++;
				220	if (CurPtr == BufferEnd)
				221	return BufferEnd;
				222	C = *CurPtr;
				223	}
				224	// We found a newline, check if it is escaped.
				225	const char *EscapePtr = CurPtr - 1;
				226	while(isHorizontalWhitespace(*EscapePtr))
				227	EscapePtr--;
				228
				229	if (*EscapePtr == '\\' \|\|
				230	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				231	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				232	// We found an escaped newline.
				233	CurPtr = skipNewline(CurPtr, BufferEnd);
				234	} else
				235	return CurPtr; // Not an escaped newline.
				236	}
				237	return BufferEnd;
				238	}
				239
				240	/// Return the one past end pointer for C comments.
				241	/// Very dumb, does not handle escaped newlines or trigraphs.
				242	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				243	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				244	if (BufferPtr == '') {
				245	assert(BufferPtr + 1 != BufferEnd);
				246	if (*(BufferPtr + 1) == '/')
				247	return BufferPtr;
				248	}
				249	}
				250	llvm_unreachable("buffer end hit before '*/' was seen");
				251	}
				252	} // unnamed namespace
				253
				254	void Lexer::lexCommentText(Token &T) {
				255	assert(CommentState == LCS_InsideBCPLComment \|\|
				256	CommentState == LCS_InsideCComment);
				257
				258	switch (State) {
				259	case LS_Normal:
				260	break;
				261	case LS_VerbatimBlockFirstLine:
				262	lexVerbatimBlockFirstLine(T);
				263	return;
				264	case LS_VerbatimBlockBody:
				265	lexVerbatimBlockBody(T);
				266	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	267	case LS_VerbatimLineText:
				268	lexVerbatimLineText(T);
				269	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	270	case LS_HTMLOpenTag:
				271	lexHTMLOpenTag(T);
				272	return;
				273	}
				274
				275	assert(State == LS_Normal);
				276
				277	const char *TokenPtr = BufferPtr;
				278	assert(TokenPtr < CommentEnd);
				279	while (TokenPtr != CommentEnd) {
				280	switch(*TokenPtr) {
				281	case '\\':
				282	case '@': {
				283	TokenPtr++;
				284	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	285	StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	286	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	287	T.setText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	288	return;
				289	}
				290	char C = *TokenPtr;
				291	switch (C) {
				292	default:
				293	break;
				294
				295	case '\\': case '@': case '&': case '$':
				296	case '#': case '<': case '>': case '%':
				297	case '\"': case '.': case ':':
				298	// This is one of \\ \@ \& \$ etc escape sequences.
				299	TokenPtr++;
				300	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				301	// This is the \:: escape sequence.
				302	TokenPtr++;
				303	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	304	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	305	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	306	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	307	return;
				308	}
				309
				310	// Don't make zero-length commands.
				311	if (!isCommandNameCharacter(*TokenPtr)) {
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	312	StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	313	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	314	T.setText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	315	return;
				316	}
				317
				318	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				319	unsigned Length = TokenPtr - (BufferPtr + 1);
				320
				321	// Hardcoded support for lexing LaTeX formula commands
				322	// \f$ \f[ \f] \f{ \f} as a single command.
				323	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				324	C = *TokenPtr;
				325	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				326	TokenPtr++;
				327	Length++;
				328	}
				329	}
				330
				331	const StringRef CommandName(BufferPtr + 1, Length);
				332	StringRef EndName;
				333
				334	if (isVerbatimBlockCommand(CommandName, EndName)) {
				335	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
				336	return;
				337	}
				338	if (isVerbatimLineCommand(CommandName)) {
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	339	setupAndLexVerbatimLine(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	340	return;
				341	}
				342	formTokenWithChars(T, TokenPtr, tok::command);
				343	T.setCommandName(CommandName);
				344	return;
				345	}
				346
				347	case '<': {
				348	TokenPtr++;
				349	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	350	StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	351	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	352	T.setText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	353	return;
				354	}
				355	const char C = *TokenPtr;
				356	if (isHTMLIdentifierCharacter(C))
				357	setupAndLexHTMLOpenTag(T);
				358	else if (C == '/')
				359	lexHTMLCloseTag(T);
Dmitri Gribenko	5676d32	2012-06-27 23:28:29 +0000	[diff] [blame]	360	else {
				361	StringRef Text(BufferPtr, TokenPtr - BufferPtr);
				362	formTokenWithChars(T, TokenPtr, tok::text);
				363	T.setText(Text);
				364	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	365	return;
				366	}
				367
				368	case '\n':
				369	case '\r':
				370	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				371	formTokenWithChars(T, TokenPtr, tok::newline);
				372
				373	if (CommentState == LCS_InsideCComment)
				374	skipLineStartingDecorations();
				375	return;
				376
				377	default: {
				378	while (true) {
				379	TokenPtr++;
				380	if (TokenPtr == CommentEnd)
				381	break;
				382	char C = *TokenPtr;
				383	if(C == '\n' \|\| C == '\r' \|\|
				384	C == '\\' \|\| C == '@' \|\| C == '<')
				385	break;
				386	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	387	StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	388	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	389	T.setText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	390	return;
				391	}
				392	}
				393	}
				394	}
				395
				396	void Lexer::setupAndLexVerbatimBlock(Token &T,
				397	const char *TextBegin,
				398	char Marker, StringRef EndName) {
				399	VerbatimBlockEndCommandName.clear();
				400	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
				401	VerbatimBlockEndCommandName.append(EndName);
				402
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	403	StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	404	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	405	T.setVerbatimBlockName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	406
				407	State = LS_VerbatimBlockFirstLine;
				408	}
				409
				410	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
				411	assert(BufferPtr < CommentEnd);
				412
				413	// FIXME: It would be better to scan the text once, finding either the block
				414	// end command or newline.
				415	//
				416	// Extract current line.
				417	const char *Newline = findNewline(BufferPtr, CommentEnd);
				418	StringRef Line(BufferPtr, Newline - BufferPtr);
				419
				420	// Look for end command in current line.
				421	size_t Pos = Line.find(VerbatimBlockEndCommandName);
				422	const char *NextLine;
				423	if (Pos == StringRef::npos) {
				424	// Current line is completely verbatim.
				425	NextLine = skipNewline(Newline, CommentEnd);
				426	} else if (Pos == 0) {
				427	// Current line contains just an end command.
				428	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	429	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	430	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	431	T.setVerbatimBlockName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	432	State = LS_Normal;
				433	return;
				434	} else {
				435	// There is some text, followed by end command. Extract text first.
				436	NextLine = BufferPtr + Pos;
				437	}
				438
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	439	StringRef Text(BufferPtr, NextLine - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	440	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	441	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	442
				443	State = LS_VerbatimBlockBody;
				444	}
				445
				446	void Lexer::lexVerbatimBlockBody(Token &T) {
				447	assert(State == LS_VerbatimBlockBody);
				448
				449	if (CommentState == LCS_InsideCComment)
				450	skipLineStartingDecorations();
				451
				452	lexVerbatimBlockFirstLine(T);
				453	}
				454
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	455	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) {
				456	const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
				457	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
				458	T.setVerbatimLineName(Name);
				459
				460	State = LS_VerbatimLineText;
				461	}
				462
				463	void Lexer::lexVerbatimLineText(Token &T) {
				464	assert(State == LS_VerbatimLineText);
				465
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	466	// Extract current line.
				467	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	468	const StringRef Text(BufferPtr, Newline - BufferPtr);
				469	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	470	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	471
				472	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	473	}
				474
				475	void Lexer::setupAndLexHTMLOpenTag(Token &T) {
				476	assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
				477	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
				478
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	479	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	480	formTokenWithChars(T, TagNameEnd, tok::html_tag_open);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	481	T.setHTMLTagOpenName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	482
				483	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				484
				485	if (BufferPtr != CommentEnd && *BufferPtr == '>') {
				486	BufferPtr++;
				487	return;
				488	}
				489
				490	if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
				491	State = LS_HTMLOpenTag;
				492	}
				493
				494	void Lexer::lexHTMLOpenTag(Token &T) {
				495	assert(State == LS_HTMLOpenTag);
				496
				497	const char *TokenPtr = BufferPtr;
				498	char C = *TokenPtr;
				499	if (isHTMLIdentifierCharacter(C)) {
				500	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	501	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	502	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	503	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	504	} else {
				505	switch (C) {
				506	case '=':
				507	TokenPtr++;
				508	formTokenWithChars(T, TokenPtr, tok::html_equals);
				509	break;
				510	case '\"':
				511	case '\'': {
				512	const char *OpenQuote = TokenPtr;
				513	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				514	const char *ClosingQuote = TokenPtr;
				515	if (TokenPtr != CommentEnd) // Skip closing quote.
				516	TokenPtr++;
				517	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				518	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				519	ClosingQuote - (OpenQuote + 1)));
				520	break;
				521	}
				522	case '>':
				523	TokenPtr++;
				524	formTokenWithChars(T, TokenPtr, tok::html_greater);
				525	break;
				526	}
				527	}
				528
				529	// Now look ahead and return to normal state if we don't see any HTML tokens
				530	// ahead.
				531	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				532	if (BufferPtr == CommentEnd) {
				533	State = LS_Normal;
				534	return;
				535	}
				536
				537	C = *BufferPtr;
				538	if (!isHTMLIdentifierCharacter(C) &&
				539	C != '=' && C != '\"' && C != '\'' && C != '>') {
				540	State = LS_Normal;
				541	return;
				542	}
				543	}
				544
				545	void Lexer::lexHTMLCloseTag(Token &T) {
				546	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				547
				548	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				549	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
				550
				551	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
				552	if (End != CommentEnd && *End == '>')
				553	End++;
				554
				555	formTokenWithChars(T, End, tok::html_tag_close);
				556	T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
				557	}
				558
				559	Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
				560	const char BufferStart, const char BufferEnd):
				561	BufferStart(BufferStart), BufferEnd(BufferEnd),
				562	FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
				563	CommentState(LCS_BeforeComment), State(LS_Normal) {
				564	}
				565
				566	void Lexer::lex(Token &T) {
				567	again:
				568	switch (CommentState) {
				569	case LCS_BeforeComment:
				570	if (BufferPtr == BufferEnd) {
				571	formTokenWithChars(T, BufferPtr, tok::eof);
				572	return;
				573	}
				574
				575	assert(*BufferPtr == '/');
				576	BufferPtr++; // Skip first slash.
				577	switch(*BufferPtr) {
				578	case '/': { // BCPL comment.
				579	BufferPtr++; // Skip second slash.
				580
				581	if (BufferPtr != BufferEnd) {
				582	// Skip Doxygen magic marker, if it is present.
				583	// It might be missing because of a typo //< or /*<, or because we
				584	// merged this non-Doxygen comment into a bunch of Doxygen comments
				585	// around it: /** ... / / ... / /* ... */
				586	const char C = *BufferPtr;
				587	if (C == '/' \|\| C == '!')
				588	BufferPtr++;
				589	}
				590
				591	// Skip less-than symbol that marks trailing comments.
				592	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				593	// are frequent typos.
				594	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				595	BufferPtr++;
				596
				597	CommentState = LCS_InsideBCPLComment;
				598	State = LS_Normal;
				599	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				600	goto again;
				601	}
				602	case '*': { // C comment.
				603	BufferPtr++; // Skip star.
				604
				605	// Skip Doxygen magic marker.
				606	const char C = *BufferPtr;
				607	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				608	BufferPtr++;
				609
				610	// Skip less-than symbol that marks trailing comments.
				611	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				612	BufferPtr++;
				613
				614	CommentState = LCS_InsideCComment;
				615	State = LS_Normal;
				616	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				617	goto again;
				618	}
				619	default:
				620	llvm_unreachable("second character of comment should be '/' or '*'");
				621	}
				622
				623	case LCS_BetweenComments: {
				624	// Consecutive comments are extracted only if there is only whitespace
				625	// between them. So we can search for the start of the next comment.
				626	const char *EndWhitespace = BufferPtr;
				627	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				628	EndWhitespace++;
				629
				630	// Turn any whitespace between comments (and there is only whitespace
				631	// between them) into a newline. We have two newlines between comments
				632	// in total (first one was synthesized after a comment).
				633	formTokenWithChars(T, EndWhitespace, tok::newline);
				634
				635	CommentState = LCS_BeforeComment;
				636	break;
				637	}
				638
				639	case LCS_InsideBCPLComment:
				640	case LCS_InsideCComment:
				641	if (BufferPtr != CommentEnd) {
				642	lexCommentText(T);
				643	break;
				644	} else {
				645	// Skip C comment closing sequence.
				646	if (CommentState == LCS_InsideCComment) {
				647	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				648	BufferPtr += 2;
				649	assert(BufferPtr <= BufferEnd);
				650
				651	// Synthenize newline just after the C comment, regardless if there is
				652	// actually a newline.
				653	formTokenWithChars(T, BufferPtr, tok::newline);
				654
				655	CommentState = LCS_BetweenComments;
				656	break;
				657	} else {
				658	// Don't synthesized a newline after BCPL comment.
				659	CommentState = LCS_BetweenComments;
				660	goto again;
				661	}
				662	}
				663	}
				664	}
				665
				666	StringRef Lexer::getSpelling(const Token &Tok,
				667	const SourceManager &SourceMgr,
				668	bool *Invalid) const {
				669	SourceLocation Loc = Tok.getLocation();
				670	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				671
				672	bool InvalidTemp = false;
				673	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				674	if (InvalidTemp) {
				675	*Invalid = true;
				676	return StringRef();
				677	}
				678
				679	const char *Begin = File.data() + LocInfo.second;
				680	return StringRef(Begin, Tok.getLength());
				681	}
				682
				683	void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) {
				684	VerbatimBlockCommand VBC;
				685	VBC.BeginName = BeginName;
				686	VBC.EndName = EndName;
				687	VerbatimBlockCommands.push_back(VBC);
				688	}
				689
				690	void Lexer::addVerbatimLineCommand(StringRef Name) {
				691	VerbatimLineCommand VLC;
				692	VLC.Name = Name;
				693	VerbatimLineCommands.push_back(VLC);
				694	}
				695
				696	} // end namespace comments
				697	} // end namespace clang
				698