Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: cee086e1032809bf2afb25ae66508fe273cb58fd [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	2	#include "clang/AST/CommentCommandTraits.h"
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	3	#include "clang/Basic/CharInfo.h"
Dmitri Gribenko	c934dfe	2013-01-19 22:06:05 +0000	[diff] [blame]	4	#include "llvm/ADT/StringExtras.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	5	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	6	#include "llvm/Support/ConvertUTF.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	7	#include "llvm/Support/ErrorHandling.h"
				8
				9	namespace clang {
				10	namespace comments {
				11
				12	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				13	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				14	Loc.dump(SM);
				15	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				16	}
				17
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame^]	18	static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	19	return isLetter(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	20	}
				21
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame^]	22	static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	23	return isDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	24	}
				25
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame^]	26	static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	27	return isHexDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	28	}
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	29
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame^]	30	static inline StringRef convertCodePointToUTF8(
				31	llvm::BumpPtrAllocator &Allocator,
				32	unsigned CodePoint) {
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	33	char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
				34	char *ResolvedPtr = Resolved;
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	35	if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	36	return StringRef(Resolved, ResolvedPtr - Resolved);
				37	else
				38	return StringRef();
				39	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	40
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame^]	41	namespace {
				42
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	43	#include "clang/AST/CommentHTMLTags.inc"
				44	#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
				45
				46	} // unnamed namespace
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	47
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	48	StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	49	// Fast path, first check a few most widely used named character references.
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	50	return llvm::StringSwitch<StringRef>(Name)
				51	.Case("amp", "&")
				52	.Case("lt", "<")
				53	.Case("gt", ">")
				54	.Case("quot", "\"")
				55	.Case("apos", "\'")
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	56	// Slow path.
				57	.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	58	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	59
				60	StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
				61	unsigned CodePoint = 0;
				62	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				63	assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
				64	CodePoint *= 10;
				65	CodePoint += Name[i] - '0';
				66	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	67	return convertCodePointToUTF8(Allocator, CodePoint);
				68	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	69
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	70	StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
				71	unsigned CodePoint = 0;
				72	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				73	CodePoint *= 16;
				74	const char C = Name[i];
				75	assert(isHTMLHexCharacterReferenceCharacter(C));
				76	CodePoint += llvm::hexDigitValue(C);
				77	}
				78	return convertCodePointToUTF8(Allocator, CodePoint);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	79	}
				80
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	81	void Lexer::skipLineStartingDecorations() {
				82	// This function should be called only for C comments
				83	assert(CommentState == LCS_InsideCComment);
				84
				85	if (BufferPtr == CommentEnd)
				86	return;
				87
				88	switch (*BufferPtr) {
				89	case ' ':
				90	case '\t':
				91	case '\f':
				92	case '\v': {
				93	const char *NewBufferPtr = BufferPtr;
				94	NewBufferPtr++;
				95	if (NewBufferPtr == CommentEnd)
				96	return;
				97
				98	char C = *NewBufferPtr;
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	99	while (isHorizontalWhitespace(C)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	100	NewBufferPtr++;
				101	if (NewBufferPtr == CommentEnd)
				102	return;
				103	C = *NewBufferPtr;
				104	}
				105	if (C == '*')
				106	BufferPtr = NewBufferPtr + 1;
				107	break;
				108	}
				109	case '*':
				110	BufferPtr++;
				111	break;
				112	}
				113	}
				114
				115	namespace {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	116	/// Returns pointer to the first newline character in the string.
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	117	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				118	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	119	if (isVerticalWhitespace(*BufferPtr))
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	120	return BufferPtr;
				121	}
				122	return BufferEnd;
				123	}
				124
				125	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				126	if (BufferPtr == BufferEnd)
				127	return BufferPtr;
				128
				129	if (*BufferPtr == '\n')
				130	BufferPtr++;
				131	else {
				132	assert(*BufferPtr == '\r');
				133	BufferPtr++;
				134	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				135	BufferPtr++;
				136	}
				137	return BufferPtr;
				138	}
				139
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	140	const char skipNamedCharacterReference(const char BufferPtr,
				141	const char *BufferEnd) {
				142	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				143	if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
				144	return BufferPtr;
				145	}
				146	return BufferEnd;
				147	}
				148
				149	const char skipDecimalCharacterReference(const char BufferPtr,
				150	const char *BufferEnd) {
				151	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				152	if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
				153	return BufferPtr;
				154	}
				155	return BufferEnd;
				156	}
				157
				158	const char skipHexCharacterReference(const char BufferPtr,
				159	const char *BufferEnd) {
				160	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				161	if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
				162	return BufferPtr;
				163	}
				164	return BufferEnd;
				165	}
				166
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	167	bool isHTMLIdentifierStartingCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	168	return isLetter(C);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	169	}
				170
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	171	bool isHTMLIdentifierCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	172	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	173	}
				174
				175	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				176	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				177	if (!isHTMLIdentifierCharacter(*BufferPtr))
				178	return BufferPtr;
				179	}
				180	return BufferEnd;
				181	}
				182
				183	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				184	/// string allowed.
				185	///
				186	/// Returns pointer to closing quote.
				187	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				188	{
				189	const char Quote = *BufferPtr;
				190	assert(Quote == '\"' \|\| Quote == '\'');
				191
				192	BufferPtr++;
				193	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				194	const char C = *BufferPtr;
				195	if (C == Quote && BufferPtr[-1] != '\\')
				196	return BufferPtr;
				197	}
				198	return BufferEnd;
				199	}
				200
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	201	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				202	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				203	if (!isWhitespace(*BufferPtr))
				204	return BufferPtr;
				205	}
				206	return BufferEnd;
				207	}
				208
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	209	bool isWhitespace(const char BufferPtr, const char BufferEnd) {
				210	return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
				211	}
				212
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	213	bool isCommandNameStartCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	214	return isLetter(C);
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	215	}
				216
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	217	bool isCommandNameCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	218	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	219	}
				220
				221	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				222	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				223	if (!isCommandNameCharacter(*BufferPtr))
				224	return BufferPtr;
				225	}
				226	return BufferEnd;
				227	}
				228
				229	/// Return the one past end pointer for BCPL comments.
				230	/// Handles newlines escaped with backslash or trigraph for backslahs.
				231	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				232	const char *CurPtr = BufferPtr;
				233	while (CurPtr != BufferEnd) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	234	while (!isVerticalWhitespace(*CurPtr)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	235	CurPtr++;
				236	if (CurPtr == BufferEnd)
				237	return BufferEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	238	}
				239	// We found a newline, check if it is escaped.
				240	const char *EscapePtr = CurPtr - 1;
				241	while(isHorizontalWhitespace(*EscapePtr))
				242	EscapePtr--;
				243
				244	if (*EscapePtr == '\\' \|\|
				245	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				246	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				247	// We found an escaped newline.
				248	CurPtr = skipNewline(CurPtr, BufferEnd);
				249	} else
				250	return CurPtr; // Not an escaped newline.
				251	}
				252	return BufferEnd;
				253	}
				254
				255	/// Return the one past end pointer for C comments.
				256	/// Very dumb, does not handle escaped newlines or trigraphs.
				257	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				258	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				259	if (BufferPtr == '') {
				260	assert(BufferPtr + 1 != BufferEnd);
				261	if (*(BufferPtr + 1) == '/')
				262	return BufferPtr;
				263	}
				264	}
				265	llvm_unreachable("buffer end hit before '*/' was seen");
				266	}
				267	} // unnamed namespace
				268
				269	void Lexer::lexCommentText(Token &T) {
				270	assert(CommentState == LCS_InsideBCPLComment \|\|
				271	CommentState == LCS_InsideCComment);
				272
				273	switch (State) {
				274	case LS_Normal:
				275	break;
				276	case LS_VerbatimBlockFirstLine:
				277	lexVerbatimBlockFirstLine(T);
				278	return;
				279	case LS_VerbatimBlockBody:
				280	lexVerbatimBlockBody(T);
				281	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	282	case LS_VerbatimLineText:
				283	lexVerbatimLineText(T);
				284	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	285	case LS_HTMLStartTag:
				286	lexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	287	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	288	case LS_HTMLEndTag:
				289	lexHTMLEndTag(T);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	290	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	291	}
				292
				293	assert(State == LS_Normal);
				294
				295	const char *TokenPtr = BufferPtr;
				296	assert(TokenPtr < CommentEnd);
				297	while (TokenPtr != CommentEnd) {
				298	switch(*TokenPtr) {
				299	case '\\':
				300	case '@': {
				301	TokenPtr++;
				302	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	303	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	304	return;
				305	}
				306	char C = *TokenPtr;
				307	switch (C) {
				308	default:
				309	break;
				310
				311	case '\\': case '@': case '&': case '$':
				312	case '#': case '<': case '>': case '%':
				313	case '\"': case '.': case ':':
				314	// This is one of \\ \@ \& \$ etc escape sequences.
				315	TokenPtr++;
				316	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				317	// This is the \:: escape sequence.
				318	TokenPtr++;
				319	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	320	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	321	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	322	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	323	return;
				324	}
				325
				326	// Don't make zero-length commands.
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	327	if (!isCommandNameStartCharacter(*TokenPtr)) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	328	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	329	return;
				330	}
				331
				332	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				333	unsigned Length = TokenPtr - (BufferPtr + 1);
				334
				335	// Hardcoded support for lexing LaTeX formula commands
				336	// \f$ \f[ \f] \f{ \f} as a single command.
				337	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				338	C = *TokenPtr;
				339	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				340	TokenPtr++;
				341	Length++;
				342	}
				343	}
				344
				345	const StringRef CommandName(BufferPtr + 1, Length);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	346
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	347	const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
				348	if (!Info) {
				349	formTokenWithChars(T, TokenPtr, tok::unknown_command);
				350	T.setUnknownCommandName(CommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	351	return;
				352	}
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	353	if (Info->IsVerbatimBlockCommand) {
				354	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
				355	return;
				356	}
				357	if (Info->IsVerbatimLineCommand) {
				358	setupAndLexVerbatimLine(T, TokenPtr, Info);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	359	return;
				360	}
				361	formTokenWithChars(T, TokenPtr, tok::command);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	362	T.setCommandID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	363	return;
				364	}
				365
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	366	case '&':
				367	lexHTMLCharacterReference(T);
				368	return;
				369
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	370	case '<': {
				371	TokenPtr++;
				372	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	373	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	374	return;
				375	}
				376	const char C = *TokenPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	377	if (isHTMLIdentifierStartingCharacter(C))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	378	setupAndLexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	379	else if (C == '/')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	380	setupAndLexHTMLEndTag(T);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	381	else
				382	formTextToken(T, TokenPtr);
				383
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	384	return;
				385	}
				386
				387	case '\n':
				388	case '\r':
				389	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				390	formTokenWithChars(T, TokenPtr, tok::newline);
				391
				392	if (CommentState == LCS_InsideCComment)
				393	skipLineStartingDecorations();
				394	return;
				395
				396	default: {
Dmitri Gribenko	aa7dbaf	2012-12-30 19:45:46 +0000	[diff] [blame]	397	size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
				398	find_first_of("\n\r\\@&<");
				399	if (End != StringRef::npos)
				400	TokenPtr += End;
				401	else
				402	TokenPtr = CommentEnd;
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	403	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	404	return;
				405	}
				406	}
				407	}
				408	}
				409
				410	void Lexer::setupAndLexVerbatimBlock(Token &T,
				411	const char *TextBegin,
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	412	char Marker, const CommandInfo *Info) {
				413	assert(Info->IsVerbatimBlockCommand);
				414
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	415	VerbatimBlockEndCommandName.clear();
				416	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	417	VerbatimBlockEndCommandName.append(Info->EndCommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	418
				419	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	420	T.setVerbatimBlockID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	421
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	422	// If there is a newline following the verbatim opening command, skip the
				423	// newline so that we don't create an tok::verbatim_block_line with empty
				424	// text content.
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	425	if (BufferPtr != CommentEnd &&
				426	isVerticalWhitespace(*BufferPtr)) {
				427	BufferPtr = skipNewline(BufferPtr, CommentEnd);
				428	State = LS_VerbatimBlockBody;
				429	return;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	430	}
				431
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	432	State = LS_VerbatimBlockFirstLine;
				433	}
				434
				435	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	436	again:
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	437	assert(BufferPtr < CommentEnd);
				438
				439	// FIXME: It would be better to scan the text once, finding either the block
				440	// end command or newline.
				441	//
				442	// Extract current line.
				443	const char *Newline = findNewline(BufferPtr, CommentEnd);
				444	StringRef Line(BufferPtr, Newline - BufferPtr);
				445
				446	// Look for end command in current line.
				447	size_t Pos = Line.find(VerbatimBlockEndCommandName);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	448	const char *TextEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	449	const char *NextLine;
				450	if (Pos == StringRef::npos) {
				451	// Current line is completely verbatim.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	452	TextEnd = Newline;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	453	NextLine = skipNewline(Newline, CommentEnd);
				454	} else if (Pos == 0) {
				455	// Current line contains just an end command.
				456	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	457	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	458	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	459	T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	460	State = LS_Normal;
				461	return;
				462	} else {
				463	// There is some text, followed by end command. Extract text first.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	464	TextEnd = BufferPtr + Pos;
				465	NextLine = TextEnd;
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	466	// If there is only whitespace before end command, skip whitespace.
				467	if (isWhitespace(BufferPtr, TextEnd)) {
				468	BufferPtr = TextEnd;
				469	goto again;
				470	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	471	}
				472
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	473	StringRef Text(BufferPtr, TextEnd - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	474	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	475	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	476
				477	State = LS_VerbatimBlockBody;
				478	}
				479
				480	void Lexer::lexVerbatimBlockBody(Token &T) {
				481	assert(State == LS_VerbatimBlockBody);
				482
				483	if (CommentState == LCS_InsideCComment)
				484	skipLineStartingDecorations();
				485
				486	lexVerbatimBlockFirstLine(T);
				487	}
				488
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	489	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
				490	const CommandInfo *Info) {
				491	assert(Info->IsVerbatimLineCommand);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	492	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	493	T.setVerbatimLineID(Info->getID());
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	494
				495	State = LS_VerbatimLineText;
				496	}
				497
				498	void Lexer::lexVerbatimLineText(Token &T) {
				499	assert(State == LS_VerbatimLineText);
				500
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	501	// Extract current line.
				502	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	503	const StringRef Text(BufferPtr, Newline - BufferPtr);
				504	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	505	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	506
				507	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	508	}
				509
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	510	void Lexer::lexHTMLCharacterReference(Token &T) {
				511	const char *TokenPtr = BufferPtr;
				512	assert(*TokenPtr == '&');
				513	TokenPtr++;
				514	if (TokenPtr == CommentEnd) {
				515	formTextToken(T, TokenPtr);
				516	return;
				517	}
				518	const char *NamePtr;
				519	bool isNamed = false;
				520	bool isDecimal = false;
				521	char C = *TokenPtr;
				522	if (isHTMLNamedCharacterReferenceCharacter(C)) {
				523	NamePtr = TokenPtr;
				524	TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
				525	isNamed = true;
				526	} else if (C == '#') {
				527	TokenPtr++;
				528	if (TokenPtr == CommentEnd) {
				529	formTextToken(T, TokenPtr);
				530	return;
				531	}
				532	C = *TokenPtr;
				533	if (isHTMLDecimalCharacterReferenceCharacter(C)) {
				534	NamePtr = TokenPtr;
				535	TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
				536	isDecimal = true;
				537	} else if (C == 'x' \|\| C == 'X') {
				538	TokenPtr++;
				539	NamePtr = TokenPtr;
				540	TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
				541	} else {
				542	formTextToken(T, TokenPtr);
				543	return;
				544	}
				545	} else {
				546	formTextToken(T, TokenPtr);
				547	return;
				548	}
				549	if (NamePtr == TokenPtr \|\| TokenPtr == CommentEnd \|\|
				550	*TokenPtr != ';') {
				551	formTextToken(T, TokenPtr);
				552	return;
				553	}
				554	StringRef Name(NamePtr, TokenPtr - NamePtr);
				555	TokenPtr++; // Skip semicolon.
				556	StringRef Resolved;
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	557	if (isNamed)
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	558	Resolved = resolveHTMLNamedCharacterReference(Name);
				559	else if (isDecimal)
				560	Resolved = resolveHTMLDecimalCharacterReference(Name);
				561	else
				562	Resolved = resolveHTMLHexCharacterReference(Name);
				563
				564	if (Resolved.empty()) {
				565	formTextToken(T, TokenPtr);
				566	return;
				567	}
				568	formTokenWithChars(T, TokenPtr, tok::text);
				569	T.setText(Resolved);
				570	return;
				571	}
				572
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	573	void Lexer::setupAndLexHTMLStartTag(Token &T) {
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	574	assert(BufferPtr[0] == '<' &&
				575	isHTMLIdentifierStartingCharacter(BufferPtr[1]));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	576	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	577	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	578	if (!isHTMLTagName(Name)) {
				579	formTextToken(T, TagNameEnd);
				580	return;
				581	}
				582
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	583	formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
				584	T.setHTMLTagStartName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	585
				586	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				587
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	588	const char C = *BufferPtr;
				589	if (BufferPtr != CommentEnd &&
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	590	(C == '>' \|\| C == '/' \|\| isHTMLIdentifierStartingCharacter(C)))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	591	State = LS_HTMLStartTag;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	592	}
				593
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	594	void Lexer::lexHTMLStartTag(Token &T) {
				595	assert(State == LS_HTMLStartTag);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	596
				597	const char *TokenPtr = BufferPtr;
				598	char C = *TokenPtr;
				599	if (isHTMLIdentifierCharacter(C)) {
				600	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	601	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	602	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	603	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	604	} else {
				605	switch (C) {
				606	case '=':
				607	TokenPtr++;
				608	formTokenWithChars(T, TokenPtr, tok::html_equals);
				609	break;
				610	case '\"':
				611	case '\'': {
				612	const char *OpenQuote = TokenPtr;
				613	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				614	const char *ClosingQuote = TokenPtr;
				615	if (TokenPtr != CommentEnd) // Skip closing quote.
				616	TokenPtr++;
				617	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				618	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				619	ClosingQuote - (OpenQuote + 1)));
				620	break;
				621	}
				622	case '>':
				623	TokenPtr++;
				624	formTokenWithChars(T, TokenPtr, tok::html_greater);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	625	State = LS_Normal;
				626	return;
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	627	case '/':
				628	TokenPtr++;
				629	if (TokenPtr != CommentEnd && *TokenPtr == '>') {
				630	TokenPtr++;
				631	formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	632	} else
				633	formTextToken(T, TokenPtr);
				634
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	635	State = LS_Normal;
				636	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	637	}
				638	}
				639
				640	// Now look ahead and return to normal state if we don't see any HTML tokens
				641	// ahead.
				642	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				643	if (BufferPtr == CommentEnd) {
				644	State = LS_Normal;
				645	return;
				646	}
				647
				648	C = *BufferPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	649	if (!isHTMLIdentifierStartingCharacter(C) &&
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	650	C != '=' && C != '\"' && C != '\'' && C != '>') {
				651	State = LS_Normal;
				652	return;
				653	}
				654	}
				655
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	656	void Lexer::setupAndLexHTMLEndTag(Token &T) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	657	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				658
				659	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				660	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	661	StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
				662	if (!isHTMLTagName(Name)) {
				663	formTextToken(T, TagNameEnd);
				664	return;
				665	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	666
				667	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	668
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	669	formTokenWithChars(T, End, tok::html_end_tag);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	670	T.setHTMLTagEndName(Name);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	671
				672	if (BufferPtr != CommentEnd && *BufferPtr == '>')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	673	State = LS_HTMLEndTag;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	674	}
				675
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	676	void Lexer::lexHTMLEndTag(Token &T) {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	677	assert(BufferPtr != CommentEnd && *BufferPtr == '>');
				678
				679	formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
				680	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	681	}
				682
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	683	Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	684	SourceLocation FileLoc,
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	685	const char BufferStart, const char BufferEnd):
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	686	Allocator(Allocator), Traits(Traits),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	687	BufferStart(BufferStart), BufferEnd(BufferEnd),
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	688	FileLoc(FileLoc), BufferPtr(BufferStart),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	689	CommentState(LCS_BeforeComment), State(LS_Normal) {
				690	}
				691
				692	void Lexer::lex(Token &T) {
				693	again:
				694	switch (CommentState) {
				695	case LCS_BeforeComment:
				696	if (BufferPtr == BufferEnd) {
				697	formTokenWithChars(T, BufferPtr, tok::eof);
				698	return;
				699	}
				700
				701	assert(*BufferPtr == '/');
				702	BufferPtr++; // Skip first slash.
				703	switch(*BufferPtr) {
				704	case '/': { // BCPL comment.
				705	BufferPtr++; // Skip second slash.
				706
				707	if (BufferPtr != BufferEnd) {
				708	// Skip Doxygen magic marker, if it is present.
				709	// It might be missing because of a typo //< or /*<, or because we
				710	// merged this non-Doxygen comment into a bunch of Doxygen comments
				711	// around it: /** ... / / ... / /* ... */
				712	const char C = *BufferPtr;
				713	if (C == '/' \|\| C == '!')
				714	BufferPtr++;
				715	}
				716
				717	// Skip less-than symbol that marks trailing comments.
				718	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				719	// are frequent typos.
				720	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				721	BufferPtr++;
				722
				723	CommentState = LCS_InsideBCPLComment;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	724	if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
				725	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	726	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				727	goto again;
				728	}
				729	case '*': { // C comment.
				730	BufferPtr++; // Skip star.
				731
				732	// Skip Doxygen magic marker.
				733	const char C = *BufferPtr;
				734	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				735	BufferPtr++;
				736
				737	// Skip less-than symbol that marks trailing comments.
				738	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				739	BufferPtr++;
				740
				741	CommentState = LCS_InsideCComment;
				742	State = LS_Normal;
				743	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				744	goto again;
				745	}
				746	default:
				747	llvm_unreachable("second character of comment should be '/' or '*'");
				748	}
				749
				750	case LCS_BetweenComments: {
				751	// Consecutive comments are extracted only if there is only whitespace
				752	// between them. So we can search for the start of the next comment.
				753	const char *EndWhitespace = BufferPtr;
				754	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				755	EndWhitespace++;
				756
				757	// Turn any whitespace between comments (and there is only whitespace
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	758	// between them -- guaranteed by comment extraction) into a newline. We
				759	// have two newlines between C comments in total (first one was synthesized
				760	// after a comment).
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	761	formTokenWithChars(T, EndWhitespace, tok::newline);
				762
				763	CommentState = LCS_BeforeComment;
				764	break;
				765	}
				766
				767	case LCS_InsideBCPLComment:
				768	case LCS_InsideCComment:
				769	if (BufferPtr != CommentEnd) {
				770	lexCommentText(T);
				771	break;
				772	} else {
				773	// Skip C comment closing sequence.
				774	if (CommentState == LCS_InsideCComment) {
				775	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				776	BufferPtr += 2;
				777	assert(BufferPtr <= BufferEnd);
				778
				779	// Synthenize newline just after the C comment, regardless if there is
				780	// actually a newline.
				781	formTokenWithChars(T, BufferPtr, tok::newline);
				782
				783	CommentState = LCS_BetweenComments;
				784	break;
				785	} else {
				786	// Don't synthesized a newline after BCPL comment.
				787	CommentState = LCS_BetweenComments;
				788	goto again;
				789	}
				790	}
				791	}
				792	}
				793
				794	StringRef Lexer::getSpelling(const Token &Tok,
				795	const SourceManager &SourceMgr,
				796	bool *Invalid) const {
				797	SourceLocation Loc = Tok.getLocation();
				798	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				799
				800	bool InvalidTemp = false;
				801	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				802	if (InvalidTemp) {
				803	*Invalid = true;
				804	return StringRef();
				805	}
				806
				807	const char *Begin = File.data() + LocInfo.second;
				808	return StringRef(Begin, Tok.getLength());
				809	}
				810
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	811	} // end namespace comments
				812	} // end namespace clang
				813