Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: 82efac64ff5f5b46f5e968248ee62db007c67ac6 [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame^]	2	#include "clang/Lex/LexDiagnostic.h"
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	3	#include "clang/AST/CommentCommandTraits.h"
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	4	#include "clang/Basic/CharInfo.h"
Dmitri Gribenko	c934dfe	2013-01-19 22:06:05 +0000	[diff] [blame]	5	#include "llvm/ADT/StringExtras.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	6	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	7	#include "llvm/Support/ConvertUTF.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	8	#include "llvm/Support/ErrorHandling.h"
				9
				10	namespace clang {
				11	namespace comments {
				12
				13	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				14	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				15	Loc.dump(SM);
				16	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				17	}
				18
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	19	static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	20	return isLetter(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	21	}
				22
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	23	static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	24	return isDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	25	}
				26
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	27	static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	28	return isHexDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	29	}
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	30
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	31	static inline StringRef convertCodePointToUTF8(
				32	llvm::BumpPtrAllocator &Allocator,
				33	unsigned CodePoint) {
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	34	char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
				35	char *ResolvedPtr = Resolved;
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	36	if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	37	return StringRef(Resolved, ResolvedPtr - Resolved);
				38	else
				39	return StringRef();
				40	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	41
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	42	namespace {
				43
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	44	#include "clang/AST/CommentHTMLTags.inc"
				45	#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
				46
				47	} // unnamed namespace
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	48
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	49	StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	50	// Fast path, first check a few most widely used named character references.
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	51	return llvm::StringSwitch<StringRef>(Name)
				52	.Case("amp", "&")
				53	.Case("lt", "<")
				54	.Case("gt", ">")
				55	.Case("quot", "\"")
				56	.Case("apos", "\'")
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	57	// Slow path.
				58	.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	59	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	60
				61	StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
				62	unsigned CodePoint = 0;
				63	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				64	assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
				65	CodePoint *= 10;
				66	CodePoint += Name[i] - '0';
				67	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	68	return convertCodePointToUTF8(Allocator, CodePoint);
				69	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	70
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	71	StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
				72	unsigned CodePoint = 0;
				73	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				74	CodePoint *= 16;
				75	const char C = Name[i];
				76	assert(isHTMLHexCharacterReferenceCharacter(C));
				77	CodePoint += llvm::hexDigitValue(C);
				78	}
				79	return convertCodePointToUTF8(Allocator, CodePoint);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	80	}
				81
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	82	void Lexer::skipLineStartingDecorations() {
				83	// This function should be called only for C comments
				84	assert(CommentState == LCS_InsideCComment);
				85
				86	if (BufferPtr == CommentEnd)
				87	return;
				88
				89	switch (*BufferPtr) {
				90	case ' ':
				91	case '\t':
				92	case '\f':
				93	case '\v': {
				94	const char *NewBufferPtr = BufferPtr;
				95	NewBufferPtr++;
				96	if (NewBufferPtr == CommentEnd)
				97	return;
				98
				99	char C = *NewBufferPtr;
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	100	while (isHorizontalWhitespace(C)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	101	NewBufferPtr++;
				102	if (NewBufferPtr == CommentEnd)
				103	return;
				104	C = *NewBufferPtr;
				105	}
				106	if (C == '*')
				107	BufferPtr = NewBufferPtr + 1;
				108	break;
				109	}
				110	case '*':
				111	BufferPtr++;
				112	break;
				113	}
				114	}
				115
				116	namespace {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	117	/// Returns pointer to the first newline character in the string.
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	118	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				119	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	120	if (isVerticalWhitespace(*BufferPtr))
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	121	return BufferPtr;
				122	}
				123	return BufferEnd;
				124	}
				125
				126	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				127	if (BufferPtr == BufferEnd)
				128	return BufferPtr;
				129
				130	if (*BufferPtr == '\n')
				131	BufferPtr++;
				132	else {
				133	assert(*BufferPtr == '\r');
				134	BufferPtr++;
				135	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				136	BufferPtr++;
				137	}
				138	return BufferPtr;
				139	}
				140
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	141	const char skipNamedCharacterReference(const char BufferPtr,
				142	const char *BufferEnd) {
				143	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				144	if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
				145	return BufferPtr;
				146	}
				147	return BufferEnd;
				148	}
				149
				150	const char skipDecimalCharacterReference(const char BufferPtr,
				151	const char *BufferEnd) {
				152	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				153	if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
				154	return BufferPtr;
				155	}
				156	return BufferEnd;
				157	}
				158
				159	const char skipHexCharacterReference(const char BufferPtr,
				160	const char *BufferEnd) {
				161	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				162	if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
				163	return BufferPtr;
				164	}
				165	return BufferEnd;
				166	}
				167
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	168	bool isHTMLIdentifierStartingCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	169	return isLetter(C);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	170	}
				171
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	172	bool isHTMLIdentifierCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	173	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	174	}
				175
				176	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				177	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				178	if (!isHTMLIdentifierCharacter(*BufferPtr))
				179	return BufferPtr;
				180	}
				181	return BufferEnd;
				182	}
				183
				184	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				185	/// string allowed.
				186	///
				187	/// Returns pointer to closing quote.
				188	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				189	{
				190	const char Quote = *BufferPtr;
				191	assert(Quote == '\"' \|\| Quote == '\'');
				192
				193	BufferPtr++;
				194	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				195	const char C = *BufferPtr;
				196	if (C == Quote && BufferPtr[-1] != '\\')
				197	return BufferPtr;
				198	}
				199	return BufferEnd;
				200	}
				201
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	202	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				203	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				204	if (!isWhitespace(*BufferPtr))
				205	return BufferPtr;
				206	}
				207	return BufferEnd;
				208	}
				209
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	210	bool isWhitespace(const char BufferPtr, const char BufferEnd) {
				211	return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
				212	}
				213
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	214	bool isCommandNameStartCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	215	return isLetter(C);
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	216	}
				217
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	218	bool isCommandNameCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	219	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	220	}
				221
				222	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				223	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				224	if (!isCommandNameCharacter(*BufferPtr))
				225	return BufferPtr;
				226	}
				227	return BufferEnd;
				228	}
				229
				230	/// Return the one past end pointer for BCPL comments.
				231	/// Handles newlines escaped with backslash or trigraph for backslahs.
				232	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				233	const char *CurPtr = BufferPtr;
				234	while (CurPtr != BufferEnd) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	235	while (!isVerticalWhitespace(*CurPtr)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	236	CurPtr++;
				237	if (CurPtr == BufferEnd)
				238	return BufferEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	239	}
				240	// We found a newline, check if it is escaped.
				241	const char *EscapePtr = CurPtr - 1;
				242	while(isHorizontalWhitespace(*EscapePtr))
				243	EscapePtr--;
				244
				245	if (*EscapePtr == '\\' \|\|
				246	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				247	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				248	// We found an escaped newline.
				249	CurPtr = skipNewline(CurPtr, BufferEnd);
				250	} else
				251	return CurPtr; // Not an escaped newline.
				252	}
				253	return BufferEnd;
				254	}
				255
				256	/// Return the one past end pointer for C comments.
				257	/// Very dumb, does not handle escaped newlines or trigraphs.
				258	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				259	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				260	if (BufferPtr == '') {
				261	assert(BufferPtr + 1 != BufferEnd);
				262	if (*(BufferPtr + 1) == '/')
				263	return BufferPtr;
				264	}
				265	}
				266	llvm_unreachable("buffer end hit before '*/' was seen");
				267	}
				268	} // unnamed namespace
				269
				270	void Lexer::lexCommentText(Token &T) {
				271	assert(CommentState == LCS_InsideBCPLComment \|\|
				272	CommentState == LCS_InsideCComment);
				273
				274	switch (State) {
				275	case LS_Normal:
				276	break;
				277	case LS_VerbatimBlockFirstLine:
				278	lexVerbatimBlockFirstLine(T);
				279	return;
				280	case LS_VerbatimBlockBody:
				281	lexVerbatimBlockBody(T);
				282	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	283	case LS_VerbatimLineText:
				284	lexVerbatimLineText(T);
				285	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	286	case LS_HTMLStartTag:
				287	lexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	288	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	289	case LS_HTMLEndTag:
				290	lexHTMLEndTag(T);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	291	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	292	}
				293
				294	assert(State == LS_Normal);
				295
				296	const char *TokenPtr = BufferPtr;
				297	assert(TokenPtr < CommentEnd);
				298	while (TokenPtr != CommentEnd) {
				299	switch(*TokenPtr) {
				300	case '\\':
				301	case '@': {
Dmitri Gribenko	808383d	2013-03-04 23:06:15 +0000	[diff] [blame]	302	// Commands that start with a backslash and commands that start with
				303	// 'at' have equivalent semantics. But we keep information about the
				304	// exact syntax in AST for comments.
				305	tok::TokenKind CommandKind =
				306	(*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	307	TokenPtr++;
				308	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	309	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	310	return;
				311	}
				312	char C = *TokenPtr;
				313	switch (C) {
				314	default:
				315	break;
				316
				317	case '\\': case '@': case '&': case '$':
				318	case '#': case '<': case '>': case '%':
				319	case '\"': case '.': case ':':
				320	// This is one of \\ \@ \& \$ etc escape sequences.
				321	TokenPtr++;
				322	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				323	// This is the \:: escape sequence.
				324	TokenPtr++;
				325	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	326	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	327	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	328	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	329	return;
				330	}
				331
				332	// Don't make zero-length commands.
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	333	if (!isCommandNameStartCharacter(*TokenPtr)) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	334	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	335	return;
				336	}
				337
				338	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				339	unsigned Length = TokenPtr - (BufferPtr + 1);
				340
				341	// Hardcoded support for lexing LaTeX formula commands
				342	// \f$ \f[ \f] \f{ \f} as a single command.
				343	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				344	C = *TokenPtr;
				345	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				346	TokenPtr++;
				347	Length++;
				348	}
				349	}
				350
				351	const StringRef CommandName(BufferPtr + 1, Length);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	352
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	353	const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
				354	if (!Info) {
				355	formTokenWithChars(T, TokenPtr, tok::unknown_command);
				356	T.setUnknownCommandName(CommandName);
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame^]	357	Diag(T.getLocation(),
				358	diag::warn_unknown_comment_command_name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	359	return;
				360	}
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	361	if (Info->IsVerbatimBlockCommand) {
				362	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
				363	return;
				364	}
				365	if (Info->IsVerbatimLineCommand) {
				366	setupAndLexVerbatimLine(T, TokenPtr, Info);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	367	return;
				368	}
Dmitri Gribenko	808383d	2013-03-04 23:06:15 +0000	[diff] [blame]	369	formTokenWithChars(T, TokenPtr, CommandKind);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	370	T.setCommandID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	371	return;
				372	}
				373
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	374	case '&':
				375	lexHTMLCharacterReference(T);
				376	return;
				377
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	378	case '<': {
				379	TokenPtr++;
				380	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	381	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	382	return;
				383	}
				384	const char C = *TokenPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	385	if (isHTMLIdentifierStartingCharacter(C))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	386	setupAndLexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	387	else if (C == '/')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	388	setupAndLexHTMLEndTag(T);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	389	else
				390	formTextToken(T, TokenPtr);
				391
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	392	return;
				393	}
				394
				395	case '\n':
				396	case '\r':
				397	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				398	formTokenWithChars(T, TokenPtr, tok::newline);
				399
				400	if (CommentState == LCS_InsideCComment)
				401	skipLineStartingDecorations();
				402	return;
				403
				404	default: {
Dmitri Gribenko	aa7dbaf	2012-12-30 19:45:46 +0000	[diff] [blame]	405	size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
				406	find_first_of("\n\r\\@&<");
				407	if (End != StringRef::npos)
				408	TokenPtr += End;
				409	else
				410	TokenPtr = CommentEnd;
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	411	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	412	return;
				413	}
				414	}
				415	}
				416	}
				417
				418	void Lexer::setupAndLexVerbatimBlock(Token &T,
				419	const char *TextBegin,
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	420	char Marker, const CommandInfo *Info) {
				421	assert(Info->IsVerbatimBlockCommand);
				422
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	423	VerbatimBlockEndCommandName.clear();
				424	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	425	VerbatimBlockEndCommandName.append(Info->EndCommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	426
				427	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	428	T.setVerbatimBlockID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	429
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	430	// If there is a newline following the verbatim opening command, skip the
				431	// newline so that we don't create an tok::verbatim_block_line with empty
				432	// text content.
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	433	if (BufferPtr != CommentEnd &&
				434	isVerticalWhitespace(*BufferPtr)) {
				435	BufferPtr = skipNewline(BufferPtr, CommentEnd);
				436	State = LS_VerbatimBlockBody;
				437	return;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	438	}
				439
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	440	State = LS_VerbatimBlockFirstLine;
				441	}
				442
				443	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	444	again:
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	445	assert(BufferPtr < CommentEnd);
				446
				447	// FIXME: It would be better to scan the text once, finding either the block
				448	// end command or newline.
				449	//
				450	// Extract current line.
				451	const char *Newline = findNewline(BufferPtr, CommentEnd);
				452	StringRef Line(BufferPtr, Newline - BufferPtr);
				453
				454	// Look for end command in current line.
				455	size_t Pos = Line.find(VerbatimBlockEndCommandName);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	456	const char *TextEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	457	const char *NextLine;
				458	if (Pos == StringRef::npos) {
				459	// Current line is completely verbatim.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	460	TextEnd = Newline;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	461	NextLine = skipNewline(Newline, CommentEnd);
				462	} else if (Pos == 0) {
				463	// Current line contains just an end command.
				464	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	465	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	466	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	467	T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	468	State = LS_Normal;
				469	return;
				470	} else {
				471	// There is some text, followed by end command. Extract text first.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	472	TextEnd = BufferPtr + Pos;
				473	NextLine = TextEnd;
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	474	// If there is only whitespace before end command, skip whitespace.
				475	if (isWhitespace(BufferPtr, TextEnd)) {
				476	BufferPtr = TextEnd;
				477	goto again;
				478	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	479	}
				480
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	481	StringRef Text(BufferPtr, TextEnd - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	482	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	483	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	484
				485	State = LS_VerbatimBlockBody;
				486	}
				487
				488	void Lexer::lexVerbatimBlockBody(Token &T) {
				489	assert(State == LS_VerbatimBlockBody);
				490
				491	if (CommentState == LCS_InsideCComment)
				492	skipLineStartingDecorations();
				493
				494	lexVerbatimBlockFirstLine(T);
				495	}
				496
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	497	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
				498	const CommandInfo *Info) {
				499	assert(Info->IsVerbatimLineCommand);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	500	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	501	T.setVerbatimLineID(Info->getID());
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	502
				503	State = LS_VerbatimLineText;
				504	}
				505
				506	void Lexer::lexVerbatimLineText(Token &T) {
				507	assert(State == LS_VerbatimLineText);
				508
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	509	// Extract current line.
				510	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	511	const StringRef Text(BufferPtr, Newline - BufferPtr);
				512	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	513	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	514
				515	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	516	}
				517
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	518	void Lexer::lexHTMLCharacterReference(Token &T) {
				519	const char *TokenPtr = BufferPtr;
				520	assert(*TokenPtr == '&');
				521	TokenPtr++;
				522	if (TokenPtr == CommentEnd) {
				523	formTextToken(T, TokenPtr);
				524	return;
				525	}
				526	const char *NamePtr;
				527	bool isNamed = false;
				528	bool isDecimal = false;
				529	char C = *TokenPtr;
				530	if (isHTMLNamedCharacterReferenceCharacter(C)) {
				531	NamePtr = TokenPtr;
				532	TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
				533	isNamed = true;
				534	} else if (C == '#') {
				535	TokenPtr++;
				536	if (TokenPtr == CommentEnd) {
				537	formTextToken(T, TokenPtr);
				538	return;
				539	}
				540	C = *TokenPtr;
				541	if (isHTMLDecimalCharacterReferenceCharacter(C)) {
				542	NamePtr = TokenPtr;
				543	TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
				544	isDecimal = true;
				545	} else if (C == 'x' \|\| C == 'X') {
				546	TokenPtr++;
				547	NamePtr = TokenPtr;
				548	TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
				549	} else {
				550	formTextToken(T, TokenPtr);
				551	return;
				552	}
				553	} else {
				554	formTextToken(T, TokenPtr);
				555	return;
				556	}
				557	if (NamePtr == TokenPtr \|\| TokenPtr == CommentEnd \|\|
				558	*TokenPtr != ';') {
				559	formTextToken(T, TokenPtr);
				560	return;
				561	}
				562	StringRef Name(NamePtr, TokenPtr - NamePtr);
				563	TokenPtr++; // Skip semicolon.
				564	StringRef Resolved;
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	565	if (isNamed)
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	566	Resolved = resolveHTMLNamedCharacterReference(Name);
				567	else if (isDecimal)
				568	Resolved = resolveHTMLDecimalCharacterReference(Name);
				569	else
				570	Resolved = resolveHTMLHexCharacterReference(Name);
				571
				572	if (Resolved.empty()) {
				573	formTextToken(T, TokenPtr);
				574	return;
				575	}
				576	formTokenWithChars(T, TokenPtr, tok::text);
				577	T.setText(Resolved);
				578	return;
				579	}
				580
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	581	void Lexer::setupAndLexHTMLStartTag(Token &T) {
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	582	assert(BufferPtr[0] == '<' &&
				583	isHTMLIdentifierStartingCharacter(BufferPtr[1]));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	584	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	585	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	586	if (!isHTMLTagName(Name)) {
				587	formTextToken(T, TagNameEnd);
				588	return;
				589	}
				590
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	591	formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
				592	T.setHTMLTagStartName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	593
				594	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				595
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	596	const char C = *BufferPtr;
				597	if (BufferPtr != CommentEnd &&
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	598	(C == '>' \|\| C == '/' \|\| isHTMLIdentifierStartingCharacter(C)))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	599	State = LS_HTMLStartTag;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	600	}
				601
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	602	void Lexer::lexHTMLStartTag(Token &T) {
				603	assert(State == LS_HTMLStartTag);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	604
				605	const char *TokenPtr = BufferPtr;
				606	char C = *TokenPtr;
				607	if (isHTMLIdentifierCharacter(C)) {
				608	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	609	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	610	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	611	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	612	} else {
				613	switch (C) {
				614	case '=':
				615	TokenPtr++;
				616	formTokenWithChars(T, TokenPtr, tok::html_equals);
				617	break;
				618	case '\"':
				619	case '\'': {
				620	const char *OpenQuote = TokenPtr;
				621	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				622	const char *ClosingQuote = TokenPtr;
				623	if (TokenPtr != CommentEnd) // Skip closing quote.
				624	TokenPtr++;
				625	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				626	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				627	ClosingQuote - (OpenQuote + 1)));
				628	break;
				629	}
				630	case '>':
				631	TokenPtr++;
				632	formTokenWithChars(T, TokenPtr, tok::html_greater);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	633	State = LS_Normal;
				634	return;
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	635	case '/':
				636	TokenPtr++;
				637	if (TokenPtr != CommentEnd && *TokenPtr == '>') {
				638	TokenPtr++;
				639	formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	640	} else
				641	formTextToken(T, TokenPtr);
				642
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	643	State = LS_Normal;
				644	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	645	}
				646	}
				647
				648	// Now look ahead and return to normal state if we don't see any HTML tokens
				649	// ahead.
				650	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				651	if (BufferPtr == CommentEnd) {
				652	State = LS_Normal;
				653	return;
				654	}
				655
				656	C = *BufferPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	657	if (!isHTMLIdentifierStartingCharacter(C) &&
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	658	C != '=' && C != '\"' && C != '\'' && C != '>') {
				659	State = LS_Normal;
				660	return;
				661	}
				662	}
				663
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	664	void Lexer::setupAndLexHTMLEndTag(Token &T) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	665	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				666
				667	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				668	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	669	StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
				670	if (!isHTMLTagName(Name)) {
				671	formTextToken(T, TagNameEnd);
				672	return;
				673	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	674
				675	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	676
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	677	formTokenWithChars(T, End, tok::html_end_tag);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	678	T.setHTMLTagEndName(Name);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	679
				680	if (BufferPtr != CommentEnd && *BufferPtr == '>')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	681	State = LS_HTMLEndTag;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	682	}
				683
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	684	void Lexer::lexHTMLEndTag(Token &T) {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	685	assert(BufferPtr != CommentEnd && *BufferPtr == '>');
				686
				687	formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
				688	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	689	}
				690
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame^]	691	Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
				692	const CommandTraits &Traits,
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	693	SourceLocation FileLoc,
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	694	const char BufferStart, const char BufferEnd):
Fariborz Jahanian	ad6fd9f	2013-05-03 23:15:20 +0000	[diff] [blame^]	695	Allocator(Allocator), Diags(Diags), Traits(Traits),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	696	BufferStart(BufferStart), BufferEnd(BufferEnd),
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	697	FileLoc(FileLoc), BufferPtr(BufferStart),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	698	CommentState(LCS_BeforeComment), State(LS_Normal) {
				699	}
				700
				701	void Lexer::lex(Token &T) {
				702	again:
				703	switch (CommentState) {
				704	case LCS_BeforeComment:
				705	if (BufferPtr == BufferEnd) {
				706	formTokenWithChars(T, BufferPtr, tok::eof);
				707	return;
				708	}
				709
				710	assert(*BufferPtr == '/');
				711	BufferPtr++; // Skip first slash.
				712	switch(*BufferPtr) {
				713	case '/': { // BCPL comment.
				714	BufferPtr++; // Skip second slash.
				715
				716	if (BufferPtr != BufferEnd) {
				717	// Skip Doxygen magic marker, if it is present.
				718	// It might be missing because of a typo //< or /*<, or because we
				719	// merged this non-Doxygen comment into a bunch of Doxygen comments
				720	// around it: /** ... / / ... / /* ... */
				721	const char C = *BufferPtr;
				722	if (C == '/' \|\| C == '!')
				723	BufferPtr++;
				724	}
				725
				726	// Skip less-than symbol that marks trailing comments.
				727	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				728	// are frequent typos.
				729	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				730	BufferPtr++;
				731
				732	CommentState = LCS_InsideBCPLComment;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	733	if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
				734	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	735	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				736	goto again;
				737	}
				738	case '*': { // C comment.
				739	BufferPtr++; // Skip star.
				740
				741	// Skip Doxygen magic marker.
				742	const char C = *BufferPtr;
				743	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				744	BufferPtr++;
				745
				746	// Skip less-than symbol that marks trailing comments.
				747	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				748	BufferPtr++;
				749
				750	CommentState = LCS_InsideCComment;
				751	State = LS_Normal;
				752	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				753	goto again;
				754	}
				755	default:
				756	llvm_unreachable("second character of comment should be '/' or '*'");
				757	}
				758
				759	case LCS_BetweenComments: {
				760	// Consecutive comments are extracted only if there is only whitespace
				761	// between them. So we can search for the start of the next comment.
				762	const char *EndWhitespace = BufferPtr;
				763	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				764	EndWhitespace++;
				765
				766	// Turn any whitespace between comments (and there is only whitespace
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	767	// between them -- guaranteed by comment extraction) into a newline. We
				768	// have two newlines between C comments in total (first one was synthesized
				769	// after a comment).
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	770	formTokenWithChars(T, EndWhitespace, tok::newline);
				771
				772	CommentState = LCS_BeforeComment;
				773	break;
				774	}
				775
				776	case LCS_InsideBCPLComment:
				777	case LCS_InsideCComment:
				778	if (BufferPtr != CommentEnd) {
				779	lexCommentText(T);
				780	break;
				781	} else {
				782	// Skip C comment closing sequence.
				783	if (CommentState == LCS_InsideCComment) {
				784	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				785	BufferPtr += 2;
				786	assert(BufferPtr <= BufferEnd);
				787
				788	// Synthenize newline just after the C comment, regardless if there is
				789	// actually a newline.
				790	formTokenWithChars(T, BufferPtr, tok::newline);
				791
				792	CommentState = LCS_BetweenComments;
				793	break;
				794	} else {
				795	// Don't synthesized a newline after BCPL comment.
				796	CommentState = LCS_BetweenComments;
				797	goto again;
				798	}
				799	}
				800	}
				801	}
				802
				803	StringRef Lexer::getSpelling(const Token &Tok,
				804	const SourceManager &SourceMgr,
				805	bool *Invalid) const {
				806	SourceLocation Loc = Tok.getLocation();
				807	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				808
				809	bool InvalidTemp = false;
				810	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				811	if (InvalidTemp) {
				812	*Invalid = true;
				813	return StringRef();
				814	}
				815
				816	const char *Begin = File.data() + LocInfo.second;
				817	return StringRef(Begin, Tok.getLength());
				818	}
				819
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	820	} // end namespace comments
				821	} // end namespace clang
				822