Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: 20180994177ed93580d870b67f75d05552430992 [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	2	#include "clang/AST/CommentCommandTraits.h"
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	3	#include "clang/Basic/CharInfo.h"
Dmitri Gribenko	c934dfe	2013-01-19 22:06:05 +0000	[diff] [blame]	4	#include "llvm/ADT/StringExtras.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	5	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	6	#include "llvm/Support/ConvertUTF.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	7	#include "llvm/Support/ErrorHandling.h"
				8
				9	namespace clang {
				10	namespace comments {
				11
				12	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				13	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				14	Loc.dump(SM);
				15	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				16	}
				17
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	18	static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	19	return isLetter(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	20	}
				21
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	22	static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	23	return isDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	24	}
				25
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	26	static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	27	return isHexDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	28	}
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	29
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	30	static inline StringRef convertCodePointToUTF8(
				31	llvm::BumpPtrAllocator &Allocator,
				32	unsigned CodePoint) {
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	33	char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
				34	char *ResolvedPtr = Resolved;
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	35	if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	36	return StringRef(Resolved, ResolvedPtr - Resolved);
				37	else
				38	return StringRef();
				39	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	40
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	41	namespace {
				42
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	43	#include "clang/AST/CommentHTMLTags.inc"
				44	#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
				45
				46	} // unnamed namespace
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	47
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	48	StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	49	// Fast path, first check a few most widely used named character references.
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	50	return llvm::StringSwitch<StringRef>(Name)
				51	.Case("amp", "&")
				52	.Case("lt", "<")
				53	.Case("gt", ">")
				54	.Case("quot", "\"")
				55	.Case("apos", "\'")
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	56	// Slow path.
				57	.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	58	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	59
				60	StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
				61	unsigned CodePoint = 0;
				62	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				63	assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
				64	CodePoint *= 10;
				65	CodePoint += Name[i] - '0';
				66	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	67	return convertCodePointToUTF8(Allocator, CodePoint);
				68	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	69
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	70	StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
				71	unsigned CodePoint = 0;
				72	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				73	CodePoint *= 16;
				74	const char C = Name[i];
				75	assert(isHTMLHexCharacterReferenceCharacter(C));
				76	CodePoint += llvm::hexDigitValue(C);
				77	}
				78	return convertCodePointToUTF8(Allocator, CodePoint);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	79	}
				80
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	81	void Lexer::skipLineStartingDecorations() {
				82	// This function should be called only for C comments
				83	assert(CommentState == LCS_InsideCComment);
				84
				85	if (BufferPtr == CommentEnd)
				86	return;
				87
				88	switch (*BufferPtr) {
				89	case ' ':
				90	case '\t':
				91	case '\f':
				92	case '\v': {
				93	const char *NewBufferPtr = BufferPtr;
				94	NewBufferPtr++;
				95	if (NewBufferPtr == CommentEnd)
				96	return;
				97
				98	char C = *NewBufferPtr;
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	99	while (isHorizontalWhitespace(C)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	100	NewBufferPtr++;
				101	if (NewBufferPtr == CommentEnd)
				102	return;
				103	C = *NewBufferPtr;
				104	}
				105	if (C == '*')
				106	BufferPtr = NewBufferPtr + 1;
				107	break;
				108	}
				109	case '*':
				110	BufferPtr++;
				111	break;
				112	}
				113	}
				114
				115	namespace {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	116	/// Returns pointer to the first newline character in the string.
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	117	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				118	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	119	if (isVerticalWhitespace(*BufferPtr))
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	120	return BufferPtr;
				121	}
				122	return BufferEnd;
				123	}
				124
				125	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				126	if (BufferPtr == BufferEnd)
				127	return BufferPtr;
				128
				129	if (*BufferPtr == '\n')
				130	BufferPtr++;
				131	else {
				132	assert(*BufferPtr == '\r');
				133	BufferPtr++;
				134	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				135	BufferPtr++;
				136	}
				137	return BufferPtr;
				138	}
				139
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	140	const char skipNamedCharacterReference(const char BufferPtr,
				141	const char *BufferEnd) {
				142	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				143	if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
				144	return BufferPtr;
				145	}
				146	return BufferEnd;
				147	}
				148
				149	const char skipDecimalCharacterReference(const char BufferPtr,
				150	const char *BufferEnd) {
				151	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				152	if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
				153	return BufferPtr;
				154	}
				155	return BufferEnd;
				156	}
				157
				158	const char skipHexCharacterReference(const char BufferPtr,
				159	const char *BufferEnd) {
				160	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				161	if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
				162	return BufferPtr;
				163	}
				164	return BufferEnd;
				165	}
				166
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	167	bool isHTMLIdentifierStartingCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	168	return isLetter(C);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	169	}
				170
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	171	bool isHTMLIdentifierCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	172	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	173	}
				174
				175	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				176	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				177	if (!isHTMLIdentifierCharacter(*BufferPtr))
				178	return BufferPtr;
				179	}
				180	return BufferEnd;
				181	}
				182
				183	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				184	/// string allowed.
				185	///
				186	/// Returns pointer to closing quote.
				187	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				188	{
				189	const char Quote = *BufferPtr;
				190	assert(Quote == '\"' \|\| Quote == '\'');
				191
				192	BufferPtr++;
				193	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				194	const char C = *BufferPtr;
				195	if (C == Quote && BufferPtr[-1] != '\\')
				196	return BufferPtr;
				197	}
				198	return BufferEnd;
				199	}
				200
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	201	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				202	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				203	if (!isWhitespace(*BufferPtr))
				204	return BufferPtr;
				205	}
				206	return BufferEnd;
				207	}
				208
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	209	bool isWhitespace(const char BufferPtr, const char BufferEnd) {
				210	return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
				211	}
				212
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	213	bool isCommandNameStartCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	214	return isLetter(C);
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	215	}
				216
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	217	bool isCommandNameCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	218	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	219	}
				220
				221	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				222	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				223	if (!isCommandNameCharacter(*BufferPtr))
				224	return BufferPtr;
				225	}
				226	return BufferEnd;
				227	}
				228
				229	/// Return the one past end pointer for BCPL comments.
				230	/// Handles newlines escaped with backslash or trigraph for backslahs.
				231	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				232	const char *CurPtr = BufferPtr;
				233	while (CurPtr != BufferEnd) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	234	while (!isVerticalWhitespace(*CurPtr)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	235	CurPtr++;
				236	if (CurPtr == BufferEnd)
				237	return BufferEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	238	}
				239	// We found a newline, check if it is escaped.
				240	const char *EscapePtr = CurPtr - 1;
				241	while(isHorizontalWhitespace(*EscapePtr))
				242	EscapePtr--;
				243
				244	if (*EscapePtr == '\\' \|\|
				245	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				246	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				247	// We found an escaped newline.
				248	CurPtr = skipNewline(CurPtr, BufferEnd);
				249	} else
				250	return CurPtr; // Not an escaped newline.
				251	}
				252	return BufferEnd;
				253	}
				254
				255	/// Return the one past end pointer for C comments.
				256	/// Very dumb, does not handle escaped newlines or trigraphs.
				257	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				258	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				259	if (BufferPtr == '') {
				260	assert(BufferPtr + 1 != BufferEnd);
				261	if (*(BufferPtr + 1) == '/')
				262	return BufferPtr;
				263	}
				264	}
				265	llvm_unreachable("buffer end hit before '*/' was seen");
				266	}
				267	} // unnamed namespace
				268
				269	void Lexer::lexCommentText(Token &T) {
				270	assert(CommentState == LCS_InsideBCPLComment \|\|
				271	CommentState == LCS_InsideCComment);
				272
				273	switch (State) {
				274	case LS_Normal:
				275	break;
				276	case LS_VerbatimBlockFirstLine:
				277	lexVerbatimBlockFirstLine(T);
				278	return;
				279	case LS_VerbatimBlockBody:
				280	lexVerbatimBlockBody(T);
				281	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	282	case LS_VerbatimLineText:
				283	lexVerbatimLineText(T);
				284	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	285	case LS_HTMLStartTag:
				286	lexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	287	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	288	case LS_HTMLEndTag:
				289	lexHTMLEndTag(T);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	290	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	291	}
				292
				293	assert(State == LS_Normal);
				294
				295	const char *TokenPtr = BufferPtr;
				296	assert(TokenPtr < CommentEnd);
				297	while (TokenPtr != CommentEnd) {
				298	switch(*TokenPtr) {
				299	case '\\':
				300	case '@': {
Fariborz Jahanian	8536fa1	2013-03-02 02:39:57 +0000	[diff] [blame]	301	bool AtCommand = (*TokenPtr == '@');
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	302	TokenPtr++;
				303	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	304	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	305	return;
				306	}
				307	char C = *TokenPtr;
				308	switch (C) {
				309	default:
				310	break;
				311
				312	case '\\': case '@': case '&': case '$':
				313	case '#': case '<': case '>': case '%':
				314	case '\"': case '.': case ':':
				315	// This is one of \\ \@ \& \$ etc escape sequences.
				316	TokenPtr++;
				317	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				318	// This is the \:: escape sequence.
				319	TokenPtr++;
				320	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	321	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	322	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	323	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	324	return;
				325	}
				326
				327	// Don't make zero-length commands.
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	328	if (!isCommandNameStartCharacter(*TokenPtr)) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	329	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	330	return;
				331	}
				332
				333	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				334	unsigned Length = TokenPtr - (BufferPtr + 1);
				335
				336	// Hardcoded support for lexing LaTeX formula commands
				337	// \f$ \f[ \f] \f{ \f} as a single command.
				338	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				339	C = *TokenPtr;
				340	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				341	TokenPtr++;
				342	Length++;
				343	}
				344	}
				345
				346	const StringRef CommandName(BufferPtr + 1, Length);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	347
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	348	const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
				349	if (!Info) {
				350	formTokenWithChars(T, TokenPtr, tok::unknown_command);
				351	T.setUnknownCommandName(CommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	352	return;
				353	}
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	354	if (Info->IsVerbatimBlockCommand) {
				355	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
				356	return;
				357	}
				358	if (Info->IsVerbatimLineCommand) {
				359	setupAndLexVerbatimLine(T, TokenPtr, Info);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	360	return;
				361	}
Fariborz Jahanian	8536fa1	2013-03-02 02:39:57 +0000	[diff] [blame]	362	formTokenWithChars(T, TokenPtr,
				363	(AtCommand ? tok::at_command
				364	: tok::backslash_command));
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	365	T.setCommandID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	366	return;
				367	}
				368
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	369	case '&':
				370	lexHTMLCharacterReference(T);
				371	return;
				372
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	373	case '<': {
				374	TokenPtr++;
				375	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	376	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	377	return;
				378	}
				379	const char C = *TokenPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	380	if (isHTMLIdentifierStartingCharacter(C))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	381	setupAndLexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	382	else if (C == '/')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	383	setupAndLexHTMLEndTag(T);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	384	else
				385	formTextToken(T, TokenPtr);
				386
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	387	return;
				388	}
				389
				390	case '\n':
				391	case '\r':
				392	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				393	formTokenWithChars(T, TokenPtr, tok::newline);
				394
				395	if (CommentState == LCS_InsideCComment)
				396	skipLineStartingDecorations();
				397	return;
				398
				399	default: {
Dmitri Gribenko	aa7dbaf	2012-12-30 19:45:46 +0000	[diff] [blame]	400	size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
				401	find_first_of("\n\r\\@&<");
				402	if (End != StringRef::npos)
				403	TokenPtr += End;
				404	else
				405	TokenPtr = CommentEnd;
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	406	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	407	return;
				408	}
				409	}
				410	}
				411	}
				412
				413	void Lexer::setupAndLexVerbatimBlock(Token &T,
				414	const char *TextBegin,
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	415	char Marker, const CommandInfo *Info) {
				416	assert(Info->IsVerbatimBlockCommand);
				417
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	418	VerbatimBlockEndCommandName.clear();
				419	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	420	VerbatimBlockEndCommandName.append(Info->EndCommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	421
				422	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	423	T.setVerbatimBlockID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	424
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	425	// If there is a newline following the verbatim opening command, skip the
				426	// newline so that we don't create an tok::verbatim_block_line with empty
				427	// text content.
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	428	if (BufferPtr != CommentEnd &&
				429	isVerticalWhitespace(*BufferPtr)) {
				430	BufferPtr = skipNewline(BufferPtr, CommentEnd);
				431	State = LS_VerbatimBlockBody;
				432	return;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	433	}
				434
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	435	State = LS_VerbatimBlockFirstLine;
				436	}
				437
				438	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	439	again:
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	440	assert(BufferPtr < CommentEnd);
				441
				442	// FIXME: It would be better to scan the text once, finding either the block
				443	// end command or newline.
				444	//
				445	// Extract current line.
				446	const char *Newline = findNewline(BufferPtr, CommentEnd);
				447	StringRef Line(BufferPtr, Newline - BufferPtr);
				448
				449	// Look for end command in current line.
				450	size_t Pos = Line.find(VerbatimBlockEndCommandName);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	451	const char *TextEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	452	const char *NextLine;
				453	if (Pos == StringRef::npos) {
				454	// Current line is completely verbatim.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	455	TextEnd = Newline;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	456	NextLine = skipNewline(Newline, CommentEnd);
				457	} else if (Pos == 0) {
				458	// Current line contains just an end command.
				459	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	460	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	461	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	462	T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	463	State = LS_Normal;
				464	return;
				465	} else {
				466	// There is some text, followed by end command. Extract text first.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	467	TextEnd = BufferPtr + Pos;
				468	NextLine = TextEnd;
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	469	// If there is only whitespace before end command, skip whitespace.
				470	if (isWhitespace(BufferPtr, TextEnd)) {
				471	BufferPtr = TextEnd;
				472	goto again;
				473	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	474	}
				475
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	476	StringRef Text(BufferPtr, TextEnd - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	477	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	478	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	479
				480	State = LS_VerbatimBlockBody;
				481	}
				482
				483	void Lexer::lexVerbatimBlockBody(Token &T) {
				484	assert(State == LS_VerbatimBlockBody);
				485
				486	if (CommentState == LCS_InsideCComment)
				487	skipLineStartingDecorations();
				488
				489	lexVerbatimBlockFirstLine(T);
				490	}
				491
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	492	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
				493	const CommandInfo *Info) {
				494	assert(Info->IsVerbatimLineCommand);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	495	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	496	T.setVerbatimLineID(Info->getID());
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	497
				498	State = LS_VerbatimLineText;
				499	}
				500
				501	void Lexer::lexVerbatimLineText(Token &T) {
				502	assert(State == LS_VerbatimLineText);
				503
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	504	// Extract current line.
				505	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	506	const StringRef Text(BufferPtr, Newline - BufferPtr);
				507	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	508	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	509
				510	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	511	}
				512
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	513	void Lexer::lexHTMLCharacterReference(Token &T) {
				514	const char *TokenPtr = BufferPtr;
				515	assert(*TokenPtr == '&');
				516	TokenPtr++;
				517	if (TokenPtr == CommentEnd) {
				518	formTextToken(T, TokenPtr);
				519	return;
				520	}
				521	const char *NamePtr;
				522	bool isNamed = false;
				523	bool isDecimal = false;
				524	char C = *TokenPtr;
				525	if (isHTMLNamedCharacterReferenceCharacter(C)) {
				526	NamePtr = TokenPtr;
				527	TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
				528	isNamed = true;
				529	} else if (C == '#') {
				530	TokenPtr++;
				531	if (TokenPtr == CommentEnd) {
				532	formTextToken(T, TokenPtr);
				533	return;
				534	}
				535	C = *TokenPtr;
				536	if (isHTMLDecimalCharacterReferenceCharacter(C)) {
				537	NamePtr = TokenPtr;
				538	TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
				539	isDecimal = true;
				540	} else if (C == 'x' \|\| C == 'X') {
				541	TokenPtr++;
				542	NamePtr = TokenPtr;
				543	TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
				544	} else {
				545	formTextToken(T, TokenPtr);
				546	return;
				547	}
				548	} else {
				549	formTextToken(T, TokenPtr);
				550	return;
				551	}
				552	if (NamePtr == TokenPtr \|\| TokenPtr == CommentEnd \|\|
				553	*TokenPtr != ';') {
				554	formTextToken(T, TokenPtr);
				555	return;
				556	}
				557	StringRef Name(NamePtr, TokenPtr - NamePtr);
				558	TokenPtr++; // Skip semicolon.
				559	StringRef Resolved;
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	560	if (isNamed)
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	561	Resolved = resolveHTMLNamedCharacterReference(Name);
				562	else if (isDecimal)
				563	Resolved = resolveHTMLDecimalCharacterReference(Name);
				564	else
				565	Resolved = resolveHTMLHexCharacterReference(Name);
				566
				567	if (Resolved.empty()) {
				568	formTextToken(T, TokenPtr);
				569	return;
				570	}
				571	formTokenWithChars(T, TokenPtr, tok::text);
				572	T.setText(Resolved);
				573	return;
				574	}
				575
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	576	void Lexer::setupAndLexHTMLStartTag(Token &T) {
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	577	assert(BufferPtr[0] == '<' &&
				578	isHTMLIdentifierStartingCharacter(BufferPtr[1]));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	579	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	580	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	581	if (!isHTMLTagName(Name)) {
				582	formTextToken(T, TagNameEnd);
				583	return;
				584	}
				585
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	586	formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
				587	T.setHTMLTagStartName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	588
				589	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				590
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	591	const char C = *BufferPtr;
				592	if (BufferPtr != CommentEnd &&
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	593	(C == '>' \|\| C == '/' \|\| isHTMLIdentifierStartingCharacter(C)))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	594	State = LS_HTMLStartTag;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	595	}
				596
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	597	void Lexer::lexHTMLStartTag(Token &T) {
				598	assert(State == LS_HTMLStartTag);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	599
				600	const char *TokenPtr = BufferPtr;
				601	char C = *TokenPtr;
				602	if (isHTMLIdentifierCharacter(C)) {
				603	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	604	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	605	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	606	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	607	} else {
				608	switch (C) {
				609	case '=':
				610	TokenPtr++;
				611	formTokenWithChars(T, TokenPtr, tok::html_equals);
				612	break;
				613	case '\"':
				614	case '\'': {
				615	const char *OpenQuote = TokenPtr;
				616	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				617	const char *ClosingQuote = TokenPtr;
				618	if (TokenPtr != CommentEnd) // Skip closing quote.
				619	TokenPtr++;
				620	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				621	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				622	ClosingQuote - (OpenQuote + 1)));
				623	break;
				624	}
				625	case '>':
				626	TokenPtr++;
				627	formTokenWithChars(T, TokenPtr, tok::html_greater);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	628	State = LS_Normal;
				629	return;
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	630	case '/':
				631	TokenPtr++;
				632	if (TokenPtr != CommentEnd && *TokenPtr == '>') {
				633	TokenPtr++;
				634	formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	635	} else
				636	formTextToken(T, TokenPtr);
				637
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	638	State = LS_Normal;
				639	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	640	}
				641	}
				642
				643	// Now look ahead and return to normal state if we don't see any HTML tokens
				644	// ahead.
				645	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				646	if (BufferPtr == CommentEnd) {
				647	State = LS_Normal;
				648	return;
				649	}
				650
				651	C = *BufferPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	652	if (!isHTMLIdentifierStartingCharacter(C) &&
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	653	C != '=' && C != '\"' && C != '\'' && C != '>') {
				654	State = LS_Normal;
				655	return;
				656	}
				657	}
				658
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	659	void Lexer::setupAndLexHTMLEndTag(Token &T) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	660	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				661
				662	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				663	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	664	StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
				665	if (!isHTMLTagName(Name)) {
				666	formTextToken(T, TagNameEnd);
				667	return;
				668	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	669
				670	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	671
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	672	formTokenWithChars(T, End, tok::html_end_tag);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	673	T.setHTMLTagEndName(Name);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	674
				675	if (BufferPtr != CommentEnd && *BufferPtr == '>')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	676	State = LS_HTMLEndTag;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	677	}
				678
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	679	void Lexer::lexHTMLEndTag(Token &T) {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	680	assert(BufferPtr != CommentEnd && *BufferPtr == '>');
				681
				682	formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
				683	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	684	}
				685
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	686	Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	687	SourceLocation FileLoc,
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	688	const char BufferStart, const char BufferEnd):
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	689	Allocator(Allocator), Traits(Traits),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	690	BufferStart(BufferStart), BufferEnd(BufferEnd),
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	691	FileLoc(FileLoc), BufferPtr(BufferStart),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	692	CommentState(LCS_BeforeComment), State(LS_Normal) {
				693	}
				694
				695	void Lexer::lex(Token &T) {
				696	again:
				697	switch (CommentState) {
				698	case LCS_BeforeComment:
				699	if (BufferPtr == BufferEnd) {
				700	formTokenWithChars(T, BufferPtr, tok::eof);
				701	return;
				702	}
				703
				704	assert(*BufferPtr == '/');
				705	BufferPtr++; // Skip first slash.
				706	switch(*BufferPtr) {
				707	case '/': { // BCPL comment.
				708	BufferPtr++; // Skip second slash.
				709
				710	if (BufferPtr != BufferEnd) {
				711	// Skip Doxygen magic marker, if it is present.
				712	// It might be missing because of a typo //< or /*<, or because we
				713	// merged this non-Doxygen comment into a bunch of Doxygen comments
				714	// around it: /** ... / / ... / /* ... */
				715	const char C = *BufferPtr;
				716	if (C == '/' \|\| C == '!')
				717	BufferPtr++;
				718	}
				719
				720	// Skip less-than symbol that marks trailing comments.
				721	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				722	// are frequent typos.
				723	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				724	BufferPtr++;
				725
				726	CommentState = LCS_InsideBCPLComment;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	727	if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
				728	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	729	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				730	goto again;
				731	}
				732	case '*': { // C comment.
				733	BufferPtr++; // Skip star.
				734
				735	// Skip Doxygen magic marker.
				736	const char C = *BufferPtr;
				737	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				738	BufferPtr++;
				739
				740	// Skip less-than symbol that marks trailing comments.
				741	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				742	BufferPtr++;
				743
				744	CommentState = LCS_InsideCComment;
				745	State = LS_Normal;
				746	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				747	goto again;
				748	}
				749	default:
				750	llvm_unreachable("second character of comment should be '/' or '*'");
				751	}
				752
				753	case LCS_BetweenComments: {
				754	// Consecutive comments are extracted only if there is only whitespace
				755	// between them. So we can search for the start of the next comment.
				756	const char *EndWhitespace = BufferPtr;
				757	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				758	EndWhitespace++;
				759
				760	// Turn any whitespace between comments (and there is only whitespace
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	761	// between them -- guaranteed by comment extraction) into a newline. We
				762	// have two newlines between C comments in total (first one was synthesized
				763	// after a comment).
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	764	formTokenWithChars(T, EndWhitespace, tok::newline);
				765
				766	CommentState = LCS_BeforeComment;
				767	break;
				768	}
				769
				770	case LCS_InsideBCPLComment:
				771	case LCS_InsideCComment:
				772	if (BufferPtr != CommentEnd) {
				773	lexCommentText(T);
				774	break;
				775	} else {
				776	// Skip C comment closing sequence.
				777	if (CommentState == LCS_InsideCComment) {
				778	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				779	BufferPtr += 2;
				780	assert(BufferPtr <= BufferEnd);
				781
				782	// Synthenize newline just after the C comment, regardless if there is
				783	// actually a newline.
				784	formTokenWithChars(T, BufferPtr, tok::newline);
				785
				786	CommentState = LCS_BetweenComments;
				787	break;
				788	} else {
				789	// Don't synthesized a newline after BCPL comment.
				790	CommentState = LCS_BetweenComments;
				791	goto again;
				792	}
				793	}
				794	}
				795	}
				796
				797	StringRef Lexer::getSpelling(const Token &Tok,
				798	const SourceManager &SourceMgr,
				799	bool *Invalid) const {
				800	SourceLocation Loc = Tok.getLocation();
				801	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				802
				803	bool InvalidTemp = false;
				804	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				805	if (InvalidTemp) {
				806	*Invalid = true;
				807	return StringRef();
				808	}
				809
				810	const char *Begin = File.data() + LocInfo.second;
				811	return StringRef(Begin, Tok.getLength());
				812	}
				813
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	814	} // end namespace comments
				815	} // end namespace clang
				816