Blame - clang/lib/Format/FormatTokenLexer.h - toolchain/llvm-project

blob: 59dc2a752f1f3dcdad72942d1eb84334ca34c435 [file] [log] [blame]

Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	1	//===--- FormatTokenLexer.h - Format C++ code ----------------- C++ -----===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// \file
				11	/// \brief This file contains FormatTokenLexer, which tokenizes a source file
				12	/// into a token stream suitable for ClangFormat.
				13	///
				14	//===----------------------------------------------------------------------===//
				15
				16	#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
				17	#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
				18
				19	#include "Encoding.h"
				20	#include "FormatToken.h"
				21	#include "clang/Basic/SourceLocation.h"
				22	#include "clang/Basic/SourceManager.h"
				23	#include "clang/Format/Format.h"
				24	#include "llvm/Support/Regex.h"
				25
Martin Probst	6181da4	2016-08-25 10:13:21 +0000	[diff] [blame]	26	#include <stack>
				27
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	28	namespace clang {
				29	namespace format {
				30
Martin Probst	6181da4	2016-08-25 10:13:21 +0000	[diff] [blame]	31	enum LexerState {
				32	NORMAL,
				33	TEMPLATE_STRING,
				34	TOKEN_STASHED,
				35	};
				36
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	37	class FormatTokenLexer {
				38	public:
Krasimir Georgiev	9ad83fe	2017-10-30 14:01:50 +0000	[diff] [blame]	39	FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	40	const FormatStyle &Style, encoding::Encoding Encoding);
				41
				42	ArrayRef<FormatToken *> lex();
				43
				44	const AdditionalKeywords &getKeywords() { return Keywords; }
				45
				46	private:
				47	void tryMergePreviousTokens();
				48
				49	bool tryMergeLessLess();
Alexander Kornienko	d4fa2e6	2017-04-11 09:55:00 +0000	[diff] [blame]	50	bool tryMergeNSStringLiteral();
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	51
				52	bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
				53
				54	// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
				55	bool precedesOperand(FormatToken *Tok);
				56
				57	bool canPrecedeRegexLiteral(FormatToken *Prev);
				58
				59	// Tries to parse a JavaScript Regex literal starting at the current token,
				60	// if that begins with a slash and is in a location where JavaScript allows
				61	// regex literals. Changes the current token to a regex literal and updates
				62	// its text if successful.
				63	void tryParseJSRegexLiteral();
				64
Martin Probst	6181da4	2016-08-25 10:13:21 +0000	[diff] [blame]	65	// Handles JavaScript template strings.
				66	//
				67	// JavaScript template strings use backticks ('`') as delimiters, and allow
				68	// embedding expressions nested in ${expr-here}. Template strings can be
				69	// nested recursively, i.e. expressions can contain template strings in turn.
				70	//
				71	// The code below parses starting from a backtick, up to a closing backtick or
				72	// an opening ${. It also maintains a stack of lexing contexts to handle
				73	// nested template parts by balancing curly braces.
				74	void handleTemplateStrings();
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	75
Krasimir Georgiev	410ed24	2017-11-10 12:50:09 +0000	[diff] [blame]	76	void tryParsePythonComment();
				77
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	78	bool tryMerge_TMacro();
				79
				80	bool tryMergeConflictMarkers();
				81
				82	FormatToken *getStashedToken();
				83
				84	FormatToken *getNextToken();
				85
				86	FormatToken *FormatTok;
				87	bool IsFirstToken;
Martin Probst	6181da4	2016-08-25 10:13:21 +0000	[diff] [blame]	88	std::stack<LexerState> StateStack;
Martin Probst	c4a0dd4	2016-05-20 11:24:24 +0000	[diff] [blame]	89	unsigned Column;
				90	unsigned TrailingWhitespace;
				91	std::unique_ptr<Lexer> Lex;
				92	const SourceManager &SourceMgr;
				93	FileID ID;
				94	const FormatStyle &Style;
				95	IdentifierTable IdentTable;
				96	AdditionalKeywords Keywords;
				97	encoding::Encoding Encoding;
				98	llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
				99	// Index (in 'Tokens') of the last token that starts a new line.
				100	unsigned FirstInLineIndex;
				101	SmallVector<FormatToken *, 16> Tokens;
				102	SmallVector<IdentifierInfo *, 8> ForEachMacros;
				103
				104	bool FormattingDisabled;
				105
				106	llvm::Regex MacroBlockBeginRegex;
				107	llvm::Regex MacroBlockEndRegex;
				108
				109	void readRawToken(FormatToken &Tok);
				110
				111	void resetLexer(unsigned Offset);
				112	};
				113
				114	} // namespace format
				115	} // namespace clang
				116
				117	#endif