Blame - lib/Format/BreakableToken.cpp - fp2-dev/platform/external/clang

blob: 5c3ad9cee2bc3aed1e1389f07d02803cf34759c0 [file] [log] [blame]

Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	1	//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// \file
				11	/// \brief Contains implementation of BreakableToken class and classes derived
				12	/// from it.
				13	///
				14	//===----------------------------------------------------------------------===//
				15
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	16	#define DEBUG_TYPE "format-token-breaker"
				17
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	18	#include "BreakableToken.h"
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	19	#include "clang/Format/Format.h"
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	20	#include "llvm/ADT/STLExtras.h"
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	21	#include "llvm/Support/Debug.h"
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	22	#include <algorithm>
				23
				24	namespace clang {
				25	namespace format {
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	26	namespace {
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	27
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	28	// FIXME: Move helper string functions to where it makes sense.
				29
				30	unsigned getOctalLength(StringRef Text) {
				31	unsigned I = 1;
				32	while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
				33	++I;
				34	}
				35	return I;
				36	}
				37
				38	unsigned getHexLength(StringRef Text) {
				39	unsigned I = 2; // Point after '\x'.
				40	while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') \|\|
				41	(Text[I] >= 'a' && Text[I] <= 'f') \|\|
				42	(Text[I] >= 'A' && Text[I] <= 'F'))) {
				43	++I;
				44	}
				45	return I;
				46	}
				47
				48	unsigned getEscapeSequenceLength(StringRef Text) {
				49	assert(Text[0] == '\\');
				50	if (Text.size() < 2)
				51	return 1;
				52
				53	switch (Text[1]) {
				54	case 'u':
				55	return 6;
				56	case 'U':
				57	return 10;
				58	case 'x':
				59	return getHexLength(Text);
				60	default:
				61	if (Text[1] >= '0' && Text[1] <= '7')
				62	return getOctalLength(Text);
				63	return 2;
				64	}
				65	}
				66
				67	StringRef::size_type getStartOfCharacter(StringRef Text,
				68	StringRef::size_type Offset) {
				69	StringRef::size_type NextEscape = Text.find('\\');
				70	while (NextEscape != StringRef::npos && NextEscape < Offset) {
				71	StringRef::size_type SequenceLength =
				72	getEscapeSequenceLength(Text.substr(NextEscape));
				73	if (Offset < NextEscape + SequenceLength)
				74	return NextEscape;
				75	NextEscape = Text.find('\\', NextEscape + SequenceLength);
				76	}
				77	return Offset;
				78	}
				79
				80	BreakableToken::Split getCommentSplit(StringRef Text,
				81	unsigned ContentStartColumn,
				82	unsigned ColumnLimit) {
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	83	if (ColumnLimit <= ContentStartColumn + 1)
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	84	return BreakableToken::Split(StringRef::npos, 0);
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	85
				86	unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
				87	StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
				88	if (SpaceOffset == StringRef::npos \|\|
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	89	// Don't break at leading whitespace.
Manuel Klimek	be9ed77	2013-05-29 22:06:18 +0000	[diff] [blame^]	90	Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
				91	// Make sure that we don't break at leading whitespace that
				92	// reaches past MaxSplit.
				93	StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");
				94	if (FirstNonWhitespace == StringRef::npos)
				95	// If the comment is only whitespace, we cannot split.
				96	return BreakableToken::Split(StringRef::npos, 0);
				97	SpaceOffset =
				98	Text.find(' ', std::max<unsigned>(MaxSplit, FirstNonWhitespace));
				99	}
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	100	if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
				101	StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
				102	StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
				103	return BreakableToken::Split(BeforeCut.size(),
				104	AfterCut.begin() - BeforeCut.end());
				105	}
				106	return BreakableToken::Split(StringRef::npos, 0);
				107	}
				108
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	109	BreakableToken::Split getStringSplit(StringRef Text,
				110	unsigned ContentStartColumn,
				111	unsigned ColumnLimit) {
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	112
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	113	if (ColumnLimit <= ContentStartColumn)
				114	return BreakableToken::Split(StringRef::npos, 0);
				115	unsigned MaxSplit = ColumnLimit - ContentStartColumn;
				116	// FIXME: Reduce unit test case.
				117	if (Text.empty())
				118	return BreakableToken::Split(StringRef::npos, 0);
				119	MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
				120	StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
				121	if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
				122	return BreakableToken::Split(SpaceOffset + 1, 0);
				123	StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
				124	if (SlashOffset != StringRef::npos && SlashOffset != 0)
				125	return BreakableToken::Split(SlashOffset + 1, 0);
				126	StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
				127	if (SplitPoint == StringRef::npos \|\| SplitPoint == 0)
				128	return BreakableToken::Split(StringRef::npos, 0);
				129	return BreakableToken::Split(SplitPoint, 0);
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	130	}
				131
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	132	} // namespace
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	133
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	134	unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	135
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	136	unsigned
				137	BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
				138	unsigned TailOffset) const {
				139	return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
				140	TailOffset;
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	141	}
				142
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	143	void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
				144	unsigned TailOffset, Split Split,
				145	bool InPPDirective,
				146	WhitespaceManager &Whitespaces) {
				147	Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
				148	Split.second, Postfix, Prefix, InPPDirective,
				149	StartColumn);
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	150	}
				151
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	152	BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
				153	unsigned StartColumn,
				154	StringRef Prefix,
				155	StringRef Postfix)
				156	: BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
				157	Postfix(Postfix) {
				158	assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
				159	Line = Tok.TokenText.substr(
				160	Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	161	}
				162
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	163	BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
				164	unsigned StartColumn)
				165	: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
				166
				167	BreakableToken::Split
				168	BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
				169	unsigned ColumnLimit) const {
				170	return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	171	}
				172
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	173	static StringRef getLineCommentPrefix(StringRef Comment) {
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	174	const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	175	for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
Alexander Kornienko	919398b	2013-04-17 17:34:05 +0000	[diff] [blame]	176	if (Comment.startswith(KnownPrefixes[i]))
				177	return KnownPrefixes[i];
				178	return "";
				179	}
				180
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	181	BreakableLineComment::BreakableLineComment(const FormatToken &Token,
				182	unsigned StartColumn)
				183	: BreakableSingleLineToken(Token, StartColumn,
				184	getLineCommentPrefix(Token.TokenText), "") {}
				185
				186	BreakableToken::Split
				187	BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
				188	unsigned ColumnLimit) const {
				189	return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
				190	ColumnLimit);
				191	}
				192
				193	BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
				194	const FormatToken &Token,
				195	unsigned StartColumn,
				196	unsigned OriginalStartColumn,
				197	bool FirstInLine)
				198	: BreakableToken(Token) {
				199	StringRef TokenText(Token.TokenText);
				200	assert(TokenText.startswith("/") && TokenText.endswith("/"));
				201	TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
				202
				203	int IndentDelta = StartColumn - OriginalStartColumn;
				204	bool NeedsStar = true;
				205	LeadingWhitespace.resize(Lines.size());
				206	StartOfLineColumn.resize(Lines.size());
				207	if (Lines.size() == 1 && !FirstInLine) {
				208	// Comments for which FirstInLine is false can start on arbitrary column,
				209	// and available horizontal space can be too small to align consecutive
				210	// lines with the first one.
				211	// FIXME: We could, probably, align them to current indentation level, but
				212	// now we just wrap them without stars.
				213	NeedsStar = false;
				214	}
				215	StartOfLineColumn[0] = StartColumn + 2;
				216	for (size_t i = 1; i < Lines.size(); ++i) {
				217	adjustWhitespace(Style, i, IndentDelta);
				218	if (Lines[i].empty())
				219	// If the last line is empty, the closing "*/" will have a star.
				220	NeedsStar = NeedsStar && i + 1 == Lines.size();
				221	else
				222	NeedsStar = NeedsStar && Lines[i][0] == '*';
				223	}
				224	Decoration = NeedsStar ? "* " : "";
				225	IndentAtLineBreak = StartOfLineColumn[0] + 1;
				226	for (size_t i = 1; i < Lines.size(); ++i) {
				227	if (Lines[i].empty()) {
				228	if (!NeedsStar && i + 1 != Lines.size())
				229	// For all but the last line (which always ends in */), set the
				230	// start column to 0 if they're empty, so we do not insert
				231	// trailing whitespace anywhere.
				232	StartOfLineColumn[i] = 0;
				233	continue;
				234	}
				235	if (NeedsStar) {
				236	// The first line already excludes the star.
				237	// For all other lines, adjust the line to exclude the star and
				238	// (optionally) the first whitespace.
				239	int Offset = Lines[i].startswith("* ") ? 2 : 1;
				240	StartOfLineColumn[i] += Offset;
				241	Lines[i] = Lines[i].substr(Offset);
				242	LeadingWhitespace[i] += Offset;
				243	}
				244	IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
				245	}
				246	DEBUG({
				247	for (size_t i = 0; i < Lines.size(); ++i) {
				248	llvm::dbgs() << i << " \|" << Lines[i] << "\| " << LeadingWhitespace[i]
				249	<< "\n";
				250	}
				251	});
				252	}
				253
				254	void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
				255	unsigned LineIndex,
				256	int IndentDelta) {
				257	// Calculate the end of the non-whitespace text in the previous line.
				258	size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
				259	if (EndOfPreviousLine == StringRef::npos)
				260	EndOfPreviousLine = 0;
				261	else
				262	++EndOfPreviousLine;
				263	// Calculate the start of the non-whitespace text in the current line.
				264	size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
				265	if (StartOfLine == StringRef::npos)
				266	StartOfLine = Lines[LineIndex].size();
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	267
				268	// Adjust Lines to only contain relevant text.
				269	Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
				270	Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
				271	// Adjust LeadingWhitespace to account all whitespace between the lines
				272	// to the current line.
				273	LeadingWhitespace[LineIndex] =
				274	Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
Manuel Klimek	d63312b	2013-05-28 10:01:59 +0000	[diff] [blame]	275
				276	// FIXME: We currently count tabs as 1 character. To solve this, we need to
				277	// get the correct indentation width of the start of the comment, which
				278	// requires correct counting of the tab expansions before the comment, and
				279	// a configurable tab width. Since the current implementation only breaks
				280	// if leading tabs are intermixed with spaces, that is not a high priority.
				281
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	282	// Adjust the start column uniformly accross all lines.
Manuel Klimek	d63312b	2013-05-28 10:01:59 +0000	[diff] [blame]	283	StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	284	}
				285
				286	unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
				287
				288	unsigned
				289	BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
				290	unsigned TailOffset) const {
				291	return getContentStartColumn(LineIndex, TailOffset) +
				292	(Lines[LineIndex].size() - TailOffset) +
				293	// The last line gets a "*/" postfix.
				294	(LineIndex + 1 == Lines.size() ? 2 : 0);
				295	}
				296
				297	BreakableToken::Split
				298	BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
				299	unsigned ColumnLimit) const {
				300	return getCommentSplit(Lines[LineIndex].substr(TailOffset),
				301	getContentStartColumn(LineIndex, TailOffset),
				302	ColumnLimit);
				303	}
				304
				305	void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
				306	Split Split, bool InPPDirective,
				307	WhitespaceManager &Whitespaces) {
				308	StringRef Text = Lines[LineIndex].substr(TailOffset);
				309	StringRef Prefix = Decoration;
				310	if (LineIndex + 1 == Lines.size() &&
				311	Text.size() == Split.first + Split.second) {
				312	// For the last line we need to break before "/", but not to add " ".
				313	Prefix = "";
				314	}
				315
				316	unsigned BreakOffsetInToken =
				317	Text.data() - Tok.TokenText.data() + Split.first;
				318	unsigned CharsToRemove = Split.second;
				319	Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
				320	InPPDirective, IndentAtLineBreak - Decoration.size());
				321	}
				322
				323	void
				324	BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
				325	unsigned InPPDirective,
				326	WhitespaceManager &Whitespaces) {
				327	if (LineIndex == 0)
				328	return;
				329	StringRef Prefix = Decoration;
Manuel Klimek	c5cc4bf	2013-05-28 08:55:01 +0000	[diff] [blame]	330	if (Lines[LineIndex].empty()) {
				331	if (LineIndex + 1 == Lines.size()) {
				332	// If the last line is empty, we don't need a prefix, as the */ will line
				333	// up with the decoration (if it exists).
				334	Prefix = "";
				335	} else if (!Decoration.empty()) {
				336	// For other empty lines, if we do have a decoration, adapt it to not
				337	// contain a trailing whitespace.
				338	Prefix = Prefix.substr(0, 1);
				339	}
				340	}
Manuel Klimek	de008c0	2013-05-27 15:23:34 +0000	[diff] [blame]	341
				342	unsigned WhitespaceOffsetInToken =
				343	Lines[LineIndex].data() - Tok.TokenText.data() -
				344	LeadingWhitespace[LineIndex];
				345	Whitespaces.breakToken(
				346	Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
				347	InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
				348	}
				349
				350	unsigned
				351	BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
				352	unsigned TailOffset) const {
				353	// If we break, we always break at the predefined indent.
				354	if (TailOffset != 0)
				355	return IndentAtLineBreak;
				356	return StartOfLineColumn[LineIndex];
				357	}
				358
Alexander Kornienko	70ce788	2013-04-15 14:28:00 +0000	[diff] [blame]	359	} // namespace format
				360	} // namespace clang