Blame - lib/AST/CommentLexer.cpp - fp2-dev/platform/external/clang

blob: da865d2ee365f5367f8aeccc364097c0960704f7 [file] [log] [blame]

Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	1	#include "clang/AST/CommentLexer.h"
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	2	#include "clang/AST/CommentCommandTraits.h"
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	3	#include "clang/Basic/CharInfo.h"
Dmitri Gribenko	c934dfe	2013-01-19 22:06:05 +0000	[diff] [blame]	4	#include "llvm/ADT/StringExtras.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	5	#include "llvm/ADT/StringSwitch.h"
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	6	#include "llvm/Support/ConvertUTF.h"
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	7	#include "llvm/Support/ErrorHandling.h"
				8
				9	namespace clang {
				10	namespace comments {
				11
				12	void Token::dump(const Lexer &L, const SourceManager &SM) const {
				13	llvm::errs() << "comments::Token Kind=" << Kind << " ";
				14	Loc.dump(SM);
				15	llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
				16	}
				17
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	18	static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	19	return isLetter(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	20	}
				21
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	22	static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	23	return isDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	24	}
				25
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	26	static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	27	return isHexDigit(C);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	28	}
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	29
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	30	static inline StringRef convertCodePointToUTF8(
				31	llvm::BumpPtrAllocator &Allocator,
				32	unsigned CodePoint) {
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	33	char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
				34	char *ResolvedPtr = Resolved;
Dmitri Gribenko	cb5620c	2013-01-30 12:06:08 +0000	[diff] [blame]	35	if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	36	return StringRef(Resolved, ResolvedPtr - Resolved);
				37	else
				38	return StringRef();
				39	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	40
Dmitri Gribenko	0ff4f8b	2013-02-10 11:54:22 +0000	[diff] [blame]	41	namespace {
				42
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	43	#include "clang/AST/CommentHTMLTags.inc"
				44	#include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
				45
				46	} // unnamed namespace
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	47
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	48	StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	49	// Fast path, first check a few most widely used named character references.
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	50	return llvm::StringSwitch<StringRef>(Name)
				51	.Case("amp", "&")
				52	.Case("lt", "<")
				53	.Case("gt", ">")
				54	.Case("quot", "\"")
				55	.Case("apos", "\'")
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	56	// Slow path.
				57	.Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
Fariborz Jahanian	658a115	2013-01-29 23:42:26 +0000	[diff] [blame]	58	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	59
				60	StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
				61	unsigned CodePoint = 0;
				62	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				63	assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
				64	CodePoint *= 10;
				65	CodePoint += Name[i] - '0';
				66	}
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	67	return convertCodePointToUTF8(Allocator, CodePoint);
				68	}
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	69
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	70	StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
				71	unsigned CodePoint = 0;
				72	for (unsigned i = 0, e = Name.size(); i != e; ++i) {
				73	CodePoint *= 16;
				74	const char C = Name[i];
				75	assert(isHTMLHexCharacterReferenceCharacter(C));
				76	CodePoint += llvm::hexDigitValue(C);
				77	}
				78	return convertCodePointToUTF8(Allocator, CodePoint);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	79	}
				80
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	81	void Lexer::skipLineStartingDecorations() {
				82	// This function should be called only for C comments
				83	assert(CommentState == LCS_InsideCComment);
				84
				85	if (BufferPtr == CommentEnd)
				86	return;
				87
				88	switch (*BufferPtr) {
				89	case ' ':
				90	case '\t':
				91	case '\f':
				92	case '\v': {
				93	const char *NewBufferPtr = BufferPtr;
				94	NewBufferPtr++;
				95	if (NewBufferPtr == CommentEnd)
				96	return;
				97
				98	char C = *NewBufferPtr;
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	99	while (isHorizontalWhitespace(C)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	100	NewBufferPtr++;
				101	if (NewBufferPtr == CommentEnd)
				102	return;
				103	C = *NewBufferPtr;
				104	}
				105	if (C == '*')
				106	BufferPtr = NewBufferPtr + 1;
				107	break;
				108	}
				109	case '*':
				110	BufferPtr++;
				111	break;
				112	}
				113	}
				114
				115	namespace {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	116	/// Returns pointer to the first newline character in the string.
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	117	const char findNewline(const char BufferPtr, const char *BufferEnd) {
				118	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	119	if (isVerticalWhitespace(*BufferPtr))
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	120	return BufferPtr;
				121	}
				122	return BufferEnd;
				123	}
				124
				125	const char skipNewline(const char BufferPtr, const char *BufferEnd) {
				126	if (BufferPtr == BufferEnd)
				127	return BufferPtr;
				128
				129	if (*BufferPtr == '\n')
				130	BufferPtr++;
				131	else {
				132	assert(*BufferPtr == '\r');
				133	BufferPtr++;
				134	if (BufferPtr != BufferEnd && *BufferPtr == '\n')
				135	BufferPtr++;
				136	}
				137	return BufferPtr;
				138	}
				139
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	140	const char skipNamedCharacterReference(const char BufferPtr,
				141	const char *BufferEnd) {
				142	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				143	if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
				144	return BufferPtr;
				145	}
				146	return BufferEnd;
				147	}
				148
				149	const char skipDecimalCharacterReference(const char BufferPtr,
				150	const char *BufferEnd) {
				151	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				152	if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
				153	return BufferPtr;
				154	}
				155	return BufferEnd;
				156	}
				157
				158	const char skipHexCharacterReference(const char BufferPtr,
				159	const char *BufferEnd) {
				160	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				161	if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
				162	return BufferPtr;
				163	}
				164	return BufferEnd;
				165	}
				166
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	167	bool isHTMLIdentifierStartingCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	168	return isLetter(C);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	169	}
				170
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	171	bool isHTMLIdentifierCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	172	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	173	}
				174
				175	const char skipHTMLIdentifier(const char BufferPtr, const char *BufferEnd) {
				176	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				177	if (!isHTMLIdentifierCharacter(*BufferPtr))
				178	return BufferPtr;
				179	}
				180	return BufferEnd;
				181	}
				182
				183	/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
				184	/// string allowed.
				185	///
				186	/// Returns pointer to closing quote.
				187	const char skipHTMLQuotedString(const char BufferPtr, const char *BufferEnd)
				188	{
				189	const char Quote = *BufferPtr;
				190	assert(Quote == '\"' \|\| Quote == '\'');
				191
				192	BufferPtr++;
				193	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				194	const char C = *BufferPtr;
				195	if (C == Quote && BufferPtr[-1] != '\\')
				196	return BufferPtr;
				197	}
				198	return BufferEnd;
				199	}
				200
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	201	const char skipWhitespace(const char BufferPtr, const char *BufferEnd) {
				202	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				203	if (!isWhitespace(*BufferPtr))
				204	return BufferPtr;
				205	}
				206	return BufferEnd;
				207	}
				208
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	209	bool isWhitespace(const char BufferPtr, const char BufferEnd) {
				210	return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
				211	}
				212
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	213	bool isCommandNameStartCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	214	return isLetter(C);
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	215	}
				216
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	217	bool isCommandNameCharacter(char C) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	218	return isAlphanumeric(C);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	219	}
				220
				221	const char skipCommandName(const char BufferPtr, const char *BufferEnd) {
				222	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				223	if (!isCommandNameCharacter(*BufferPtr))
				224	return BufferPtr;
				225	}
				226	return BufferEnd;
				227	}
				228
				229	/// Return the one past end pointer for BCPL comments.
				230	/// Handles newlines escaped with backslash or trigraph for backslahs.
				231	const char findBCPLCommentEnd(const char BufferPtr, const char *BufferEnd) {
				232	const char *CurPtr = BufferPtr;
				233	while (CurPtr != BufferEnd) {
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	234	while (!isVerticalWhitespace(*CurPtr)) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	235	CurPtr++;
				236	if (CurPtr == BufferEnd)
				237	return BufferEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	238	}
				239	// We found a newline, check if it is escaped.
				240	const char *EscapePtr = CurPtr - 1;
				241	while(isHorizontalWhitespace(*EscapePtr))
				242	EscapePtr--;
				243
				244	if (*EscapePtr == '\\' \|\|
				245	(EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
				246	EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
				247	// We found an escaped newline.
				248	CurPtr = skipNewline(CurPtr, BufferEnd);
				249	} else
				250	return CurPtr; // Not an escaped newline.
				251	}
				252	return BufferEnd;
				253	}
				254
				255	/// Return the one past end pointer for C comments.
				256	/// Very dumb, does not handle escaped newlines or trigraphs.
				257	const char findCCommentEnd(const char BufferPtr, const char *BufferEnd) {
				258	for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
				259	if (BufferPtr == '') {
				260	assert(BufferPtr + 1 != BufferEnd);
				261	if (*(BufferPtr + 1) == '/')
				262	return BufferPtr;
				263	}
				264	}
				265	llvm_unreachable("buffer end hit before '*/' was seen");
				266	}
				267	} // unnamed namespace
				268
				269	void Lexer::lexCommentText(Token &T) {
				270	assert(CommentState == LCS_InsideBCPLComment \|\|
				271	CommentState == LCS_InsideCComment);
				272
				273	switch (State) {
				274	case LS_Normal:
				275	break;
				276	case LS_VerbatimBlockFirstLine:
				277	lexVerbatimBlockFirstLine(T);
				278	return;
				279	case LS_VerbatimBlockBody:
				280	lexVerbatimBlockBody(T);
				281	return;
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	282	case LS_VerbatimLineText:
				283	lexVerbatimLineText(T);
				284	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	285	case LS_HTMLStartTag:
				286	lexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	287	return;
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	288	case LS_HTMLEndTag:
				289	lexHTMLEndTag(T);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	290	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	291	}
				292
				293	assert(State == LS_Normal);
				294
				295	const char *TokenPtr = BufferPtr;
				296	assert(TokenPtr < CommentEnd);
				297	while (TokenPtr != CommentEnd) {
				298	switch(*TokenPtr) {
				299	case '\\':
				300	case '@': {
Fariborz Jahanian	c98e913	2013-03-01 22:51:30 +0000	[diff] [blame]	301	T.HDCommand = (*TokenPtr == '@');
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	302	TokenPtr++;
				303	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	304	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	305	return;
				306	}
				307	char C = *TokenPtr;
				308	switch (C) {
				309	default:
				310	break;
				311
				312	case '\\': case '@': case '&': case '$':
				313	case '#': case '<': case '>': case '%':
				314	case '\"': case '.': case ':':
				315	// This is one of \\ \@ \& \$ etc escape sequences.
				316	TokenPtr++;
				317	if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
				318	// This is the \:: escape sequence.
				319	TokenPtr++;
				320	}
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	321	StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	322	formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	323	T.setText(UnescapedText);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	324	return;
				325	}
				326
				327	// Don't make zero-length commands.
Dmitri Gribenko	8c05da3	2012-09-14 16:35:35 +0000	[diff] [blame]	328	if (!isCommandNameStartCharacter(*TokenPtr)) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	329	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	330	return;
				331	}
				332
				333	TokenPtr = skipCommandName(TokenPtr, CommentEnd);
				334	unsigned Length = TokenPtr - (BufferPtr + 1);
				335
				336	// Hardcoded support for lexing LaTeX formula commands
				337	// \f$ \f[ \f] \f{ \f} as a single command.
				338	if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
				339	C = *TokenPtr;
				340	if (C == '$' \|\| C == '[' \|\| C == ']' \|\| C == '{' \|\| C == '}') {
				341	TokenPtr++;
				342	Length++;
				343	}
				344	}
				345
				346	const StringRef CommandName(BufferPtr + 1, Length);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	347
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	348	const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
				349	if (!Info) {
				350	formTokenWithChars(T, TokenPtr, tok::unknown_command);
				351	T.setUnknownCommandName(CommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	352	return;
				353	}
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	354	if (Info->IsVerbatimBlockCommand) {
				355	setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
				356	return;
				357	}
				358	if (Info->IsVerbatimLineCommand) {
				359	setupAndLexVerbatimLine(T, TokenPtr, Info);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	360	return;
				361	}
				362	formTokenWithChars(T, TokenPtr, tok::command);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	363	T.setCommandID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	364	return;
				365	}
				366
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	367	case '&':
				368	lexHTMLCharacterReference(T);
				369	return;
				370
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	371	case '<': {
				372	TokenPtr++;
				373	if (TokenPtr == CommentEnd) {
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	374	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	375	return;
				376	}
				377	const char C = *TokenPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	378	if (isHTMLIdentifierStartingCharacter(C))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	379	setupAndLexHTMLStartTag(T);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	380	else if (C == '/')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	381	setupAndLexHTMLEndTag(T);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	382	else
				383	formTextToken(T, TokenPtr);
				384
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	385	return;
				386	}
				387
				388	case '\n':
				389	case '\r':
				390	TokenPtr = skipNewline(TokenPtr, CommentEnd);
				391	formTokenWithChars(T, TokenPtr, tok::newline);
				392
				393	if (CommentState == LCS_InsideCComment)
				394	skipLineStartingDecorations();
				395	return;
				396
				397	default: {
Dmitri Gribenko	aa7dbaf	2012-12-30 19:45:46 +0000	[diff] [blame]	398	size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr).
				399	find_first_of("\n\r\\@&<");
				400	if (End != StringRef::npos)
				401	TokenPtr += End;
				402	else
				403	TokenPtr = CommentEnd;
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	404	formTextToken(T, TokenPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	405	return;
				406	}
				407	}
				408	}
				409	}
				410
				411	void Lexer::setupAndLexVerbatimBlock(Token &T,
				412	const char *TextBegin,
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	413	char Marker, const CommandInfo *Info) {
				414	assert(Info->IsVerbatimBlockCommand);
				415
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	416	VerbatimBlockEndCommandName.clear();
				417	VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	418	VerbatimBlockEndCommandName.append(Info->EndCommandName);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	419
				420	formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	421	T.setVerbatimBlockID(Info->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	422
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	423	// If there is a newline following the verbatim opening command, skip the
				424	// newline so that we don't create an tok::verbatim_block_line with empty
				425	// text content.
Dmitri Gribenko	bf88144	2013-02-09 15:16:58 +0000	[diff] [blame]	426	if (BufferPtr != CommentEnd &&
				427	isVerticalWhitespace(*BufferPtr)) {
				428	BufferPtr = skipNewline(BufferPtr, CommentEnd);
				429	State = LS_VerbatimBlockBody;
				430	return;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	431	}
				432
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	433	State = LS_VerbatimBlockFirstLine;
				434	}
				435
				436	void Lexer::lexVerbatimBlockFirstLine(Token &T) {
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	437	again:
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	438	assert(BufferPtr < CommentEnd);
				439
				440	// FIXME: It would be better to scan the text once, finding either the block
				441	// end command or newline.
				442	//
				443	// Extract current line.
				444	const char *Newline = findNewline(BufferPtr, CommentEnd);
				445	StringRef Line(BufferPtr, Newline - BufferPtr);
				446
				447	// Look for end command in current line.
				448	size_t Pos = Line.find(VerbatimBlockEndCommandName);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	449	const char *TextEnd;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	450	const char *NextLine;
				451	if (Pos == StringRef::npos) {
				452	// Current line is completely verbatim.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	453	TextEnd = Newline;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	454	NextLine = skipNewline(Newline, CommentEnd);
				455	} else if (Pos == 0) {
				456	// Current line contains just an end command.
				457	const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	458	StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	459	formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	460	T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	461	State = LS_Normal;
				462	return;
				463	} else {
				464	// There is some text, followed by end command. Extract text first.
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	465	TextEnd = BufferPtr + Pos;
				466	NextLine = TextEnd;
Dmitri Gribenko	64da4e5	2012-07-18 23:01:58 +0000	[diff] [blame]	467	// If there is only whitespace before end command, skip whitespace.
				468	if (isWhitespace(BufferPtr, TextEnd)) {
				469	BufferPtr = TextEnd;
				470	goto again;
				471	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	472	}
				473
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	474	StringRef Text(BufferPtr, TextEnd - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	475	formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	476	T.setVerbatimBlockText(Text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	477
				478	State = LS_VerbatimBlockBody;
				479	}
				480
				481	void Lexer::lexVerbatimBlockBody(Token &T) {
				482	assert(State == LS_VerbatimBlockBody);
				483
				484	if (CommentState == LCS_InsideCComment)
				485	skipLineStartingDecorations();
				486
				487	lexVerbatimBlockFirstLine(T);
				488	}
				489
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	490	void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
				491	const CommandInfo *Info) {
				492	assert(Info->IsVerbatimLineCommand);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	493	formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
Dmitri Gribenko	e4330a3	2012-09-10 20:32:42 +0000	[diff] [blame]	494	T.setVerbatimLineID(Info->getID());
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	495
				496	State = LS_VerbatimLineText;
				497	}
				498
				499	void Lexer::lexVerbatimLineText(Token &T) {
				500	assert(State == LS_VerbatimLineText);
				501
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	502	// Extract current line.
				503	const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	504	const StringRef Text(BufferPtr, Newline - BufferPtr);
				505	formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	506	T.setVerbatimLineText(Text);
Dmitri Gribenko	962668d	2012-06-27 16:53:58 +0000	[diff] [blame]	507
				508	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	509	}
				510
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	511	void Lexer::lexHTMLCharacterReference(Token &T) {
				512	const char *TokenPtr = BufferPtr;
				513	assert(*TokenPtr == '&');
				514	TokenPtr++;
				515	if (TokenPtr == CommentEnd) {
				516	formTextToken(T, TokenPtr);
				517	return;
				518	}
				519	const char *NamePtr;
				520	bool isNamed = false;
				521	bool isDecimal = false;
				522	char C = *TokenPtr;
				523	if (isHTMLNamedCharacterReferenceCharacter(C)) {
				524	NamePtr = TokenPtr;
				525	TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
				526	isNamed = true;
				527	} else if (C == '#') {
				528	TokenPtr++;
				529	if (TokenPtr == CommentEnd) {
				530	formTextToken(T, TokenPtr);
				531	return;
				532	}
				533	C = *TokenPtr;
				534	if (isHTMLDecimalCharacterReferenceCharacter(C)) {
				535	NamePtr = TokenPtr;
				536	TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
				537	isDecimal = true;
				538	} else if (C == 'x' \|\| C == 'X') {
				539	TokenPtr++;
				540	NamePtr = TokenPtr;
				541	TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
				542	} else {
				543	formTextToken(T, TokenPtr);
				544	return;
				545	}
				546	} else {
				547	formTextToken(T, TokenPtr);
				548	return;
				549	}
				550	if (NamePtr == TokenPtr \|\| TokenPtr == CommentEnd \|\|
				551	*TokenPtr != ';') {
				552	formTextToken(T, TokenPtr);
				553	return;
				554	}
				555	StringRef Name(NamePtr, TokenPtr - NamePtr);
				556	TokenPtr++; // Skip semicolon.
				557	StringRef Resolved;
Dmitri Gribenko	5bd1e5b	2013-01-30 14:29:28 +0000	[diff] [blame]	558	if (isNamed)
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	559	Resolved = resolveHTMLNamedCharacterReference(Name);
				560	else if (isDecimal)
				561	Resolved = resolveHTMLDecimalCharacterReference(Name);
				562	else
				563	Resolved = resolveHTMLHexCharacterReference(Name);
				564
				565	if (Resolved.empty()) {
				566	formTextToken(T, TokenPtr);
				567	return;
				568	}
				569	formTokenWithChars(T, TokenPtr, tok::text);
				570	T.setText(Resolved);
				571	return;
				572	}
				573
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	574	void Lexer::setupAndLexHTMLStartTag(Token &T) {
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	575	assert(BufferPtr[0] == '<' &&
				576	isHTMLIdentifierStartingCharacter(BufferPtr[1]));
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	577	const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	578	StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	579	if (!isHTMLTagName(Name)) {
				580	formTextToken(T, TagNameEnd);
				581	return;
				582	}
				583
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	584	formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
				585	T.setHTMLTagStartName(Name);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	586
				587	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				588
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	589	const char C = *BufferPtr;
				590	if (BufferPtr != CommentEnd &&
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	591	(C == '>' \|\| C == '/' \|\| isHTMLIdentifierStartingCharacter(C)))
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	592	State = LS_HTMLStartTag;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	593	}
				594
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	595	void Lexer::lexHTMLStartTag(Token &T) {
				596	assert(State == LS_HTMLStartTag);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	597
				598	const char *TokenPtr = BufferPtr;
				599	char C = *TokenPtr;
				600	if (isHTMLIdentifierCharacter(C)) {
				601	TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	602	StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	603	formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenko	f5e0aea	2012-06-27 16:30:35 +0000	[diff] [blame]	604	T.setHTMLIdent(Ident);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	605	} else {
				606	switch (C) {
				607	case '=':
				608	TokenPtr++;
				609	formTokenWithChars(T, TokenPtr, tok::html_equals);
				610	break;
				611	case '\"':
				612	case '\'': {
				613	const char *OpenQuote = TokenPtr;
				614	TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
				615	const char *ClosingQuote = TokenPtr;
				616	if (TokenPtr != CommentEnd) // Skip closing quote.
				617	TokenPtr++;
				618	formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
				619	T.setHTMLQuotedString(StringRef(OpenQuote + 1,
				620	ClosingQuote - (OpenQuote + 1)));
				621	break;
				622	}
				623	case '>':
				624	TokenPtr++;
				625	formTokenWithChars(T, TokenPtr, tok::html_greater);
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	626	State = LS_Normal;
				627	return;
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	628	case '/':
				629	TokenPtr++;
				630	if (TokenPtr != CommentEnd && *TokenPtr == '>') {
				631	TokenPtr++;
				632	formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
Dmitri Gribenko	477a9f5	2012-07-27 20:37:06 +0000	[diff] [blame]	633	} else
				634	formTextToken(T, TokenPtr);
				635
Dmitri Gribenko	a5ef44f	2012-07-11 21:38:39 +0000	[diff] [blame]	636	State = LS_Normal;
				637	return;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	638	}
				639	}
				640
				641	// Now look ahead and return to normal state if we don't see any HTML tokens
				642	// ahead.
				643	BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
				644	if (BufferPtr == CommentEnd) {
				645	State = LS_Normal;
				646	return;
				647	}
				648
				649	C = *BufferPtr;
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	650	if (!isHTMLIdentifierStartingCharacter(C) &&
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	651	C != '=' && C != '\"' && C != '\'' && C != '>') {
				652	State = LS_Normal;
				653	return;
				654	}
				655	}
				656
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	657	void Lexer::setupAndLexHTMLEndTag(Token &T) {
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	658	assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
				659
				660	const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
				661	const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	662	StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
				663	if (!isHTMLTagName(Name)) {
				664	formTextToken(T, TagNameEnd);
				665	return;
				666	}
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	667
				668	const char *End = skipWhitespace(TagNameEnd, CommentEnd);
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	669
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	670	formTokenWithChars(T, End, tok::html_end_tag);
Dmitri Gribenko	834a5bd	2012-08-22 22:56:08 +0000	[diff] [blame]	671	T.setHTMLTagEndName(Name);
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	672
				673	if (BufferPtr != CommentEnd && *BufferPtr == '>')
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	674	State = LS_HTMLEndTag;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	675	}
				676
Dmitri Gribenko	3f38bf2	2012-07-13 00:44:24 +0000	[diff] [blame]	677	void Lexer::lexHTMLEndTag(Token &T) {
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	678	assert(BufferPtr != CommentEnd && *BufferPtr == '>');
				679
				680	formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
				681	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	682	}
				683
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	684	Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits,
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	685	SourceLocation FileLoc,
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	686	const char BufferStart, const char BufferEnd):
Dmitri Gribenko	aa58081	2012-08-09 00:03:17 +0000	[diff] [blame]	687	Allocator(Allocator), Traits(Traits),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	688	BufferStart(BufferStart), BufferEnd(BufferEnd),
Dmitri Gribenko	af503a6	2012-08-31 10:35:30 +0000	[diff] [blame]	689	FileLoc(FileLoc), BufferPtr(BufferStart),
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	690	CommentState(LCS_BeforeComment), State(LS_Normal) {
				691	}
				692
				693	void Lexer::lex(Token &T) {
				694	again:
				695	switch (CommentState) {
				696	case LCS_BeforeComment:
				697	if (BufferPtr == BufferEnd) {
				698	formTokenWithChars(T, BufferPtr, tok::eof);
				699	return;
				700	}
				701
				702	assert(*BufferPtr == '/');
				703	BufferPtr++; // Skip first slash.
				704	switch(*BufferPtr) {
				705	case '/': { // BCPL comment.
				706	BufferPtr++; // Skip second slash.
				707
				708	if (BufferPtr != BufferEnd) {
				709	// Skip Doxygen magic marker, if it is present.
				710	// It might be missing because of a typo //< or /*<, or because we
				711	// merged this non-Doxygen comment into a bunch of Doxygen comments
				712	// around it: /** ... / / ... / /* ... */
				713	const char C = *BufferPtr;
				714	if (C == '/' \|\| C == '!')
				715	BufferPtr++;
				716	}
				717
				718	// Skip less-than symbol that marks trailing comments.
				719	// Skip it even if the comment is not a Doxygen one, because //< and /*<
				720	// are frequent typos.
				721	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				722	BufferPtr++;
				723
				724	CommentState = LCS_InsideBCPLComment;
Dmitri Gribenko	8d3ba23	2012-07-06 00:28:32 +0000	[diff] [blame]	725	if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
				726	State = LS_Normal;
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	727	CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
				728	goto again;
				729	}
				730	case '*': { // C comment.
				731	BufferPtr++; // Skip star.
				732
				733	// Skip Doxygen magic marker.
				734	const char C = *BufferPtr;
				735	if ((C == '' && (BufferPtr + 1) != '/') \|\| C == '!')
				736	BufferPtr++;
				737
				738	// Skip less-than symbol that marks trailing comments.
				739	if (BufferPtr != BufferEnd && *BufferPtr == '<')
				740	BufferPtr++;
				741
				742	CommentState = LCS_InsideCComment;
				743	State = LS_Normal;
				744	CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
				745	goto again;
				746	}
				747	default:
				748	llvm_unreachable("second character of comment should be '/' or '*'");
				749	}
				750
				751	case LCS_BetweenComments: {
				752	// Consecutive comments are extracted only if there is only whitespace
				753	// between them. So we can search for the start of the next comment.
				754	const char *EndWhitespace = BufferPtr;
				755	while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
				756	EndWhitespace++;
				757
				758	// Turn any whitespace between comments (and there is only whitespace
Dmitri Gribenko	a99ec10	2012-07-09 21:32:40 +0000	[diff] [blame]	759	// between them -- guaranteed by comment extraction) into a newline. We
				760	// have two newlines between C comments in total (first one was synthesized
				761	// after a comment).
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	762	formTokenWithChars(T, EndWhitespace, tok::newline);
				763
				764	CommentState = LCS_BeforeComment;
				765	break;
				766	}
				767
				768	case LCS_InsideBCPLComment:
				769	case LCS_InsideCComment:
				770	if (BufferPtr != CommentEnd) {
				771	lexCommentText(T);
				772	break;
				773	} else {
				774	// Skip C comment closing sequence.
				775	if (CommentState == LCS_InsideCComment) {
				776	assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
				777	BufferPtr += 2;
				778	assert(BufferPtr <= BufferEnd);
				779
				780	// Synthenize newline just after the C comment, regardless if there is
				781	// actually a newline.
				782	formTokenWithChars(T, BufferPtr, tok::newline);
				783
				784	CommentState = LCS_BetweenComments;
				785	break;
				786	} else {
				787	// Don't synthesized a newline after BCPL comment.
				788	CommentState = LCS_BetweenComments;
				789	goto again;
				790	}
				791	}
				792	}
				793	}
				794
				795	StringRef Lexer::getSpelling(const Token &Tok,
				796	const SourceManager &SourceMgr,
				797	bool *Invalid) const {
				798	SourceLocation Loc = Tok.getLocation();
				799	std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
				800
				801	bool InvalidTemp = false;
				802	StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
				803	if (InvalidTemp) {
				804	*Invalid = true;
				805	return StringRef();
				806	}
				807
				808	const char *Begin = File.data() + LocInfo.second;
				809	return StringRef(Begin, Tok.getLength());
				810	}
				811
Dmitri Gribenko	2d44d77	2012-06-26 20:39:18 +0000	[diff] [blame]	812	} // end namespace comments
				813	} // end namespace clang
				814