Blame - clang/lib/Format/BreakableToken.cpp - toolchain/llvm-project

2013-04-15 14:28:00 +0000

[diff] [blame]

1

//===--- BreakableToken.cpp - Format C++ code -----------------------------===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

///

10

/// \file

11

/// \brief Contains implementation of BreakableToken class and classes derived

12

/// from it.

13

///

14

//===----------------------------------------------------------------------===//

15

16

#include "BreakableToken.h"

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

17

#include "ContinuationIndenter.h"

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

18

#include "clang/Basic/CharInfo.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

19

#include "clang/Format/Format.h"

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

20

#include "llvm/ADT/STLExtras.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

21

#include "llvm/Support/Debug.h"

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

22

#include <algorithm>

23

Chandler Carruth

1034666

2014-04-22 03:17:02 +0000

[diff] [blame]

24

#define DEBUG_TYPE "format-token-breaker"

25

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

namespace clang {

namespace format {

Daniel Jasper

2013-10-30 07:36:40 +0000

[diff] [blame]

29

static const char *const Blanks = " \t\v\f\r";

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

30

static bool IsBlank(char C) {

31

switch (C) {

Daniel Jasper

3ac9b9e

2013-07-08 14:34:09 +0000

[diff] [blame]

case ' ':

case '\t':

case '\v':

case '\f':

Daniel Jasper

580da27

2013-10-30 07:36:40 +0000

[diff] [blame]

36

case '\r':

Daniel Jasper

3ac9b9e

2013-07-08 14:34:09 +0000

[diff] [blame]

37

return true;

38

default:

39

return false;

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

}

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

43

static StringRef getLineCommentIndentPrefix(StringRef Comment) {

Krasimir Georgiev

ba6b315

2017-05-18 07:36:21 +0000

[diff] [blame]

44

static const char *const KnownPrefixes[] = {

45

"///<", "//!<", "///", "//", "//!"};

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

46

StringRef LongestPrefix;

47

for (StringRef KnownPrefix : KnownPrefixes) {

48

if (Comment.startswith(KnownPrefix)) {

49

size_t PrefixLength = KnownPrefix.size();

50

while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')

51

++PrefixLength;

52

if (PrefixLength > LongestPrefix.size())

53

LongestPrefix = Comment.substr(0, PrefixLength);

54

}

55

}

56

return LongestPrefix;

57

}

58

Craig Topper

bfb5c40

2013-07-01 03:38:29 +0000

[diff] [blame]

59

static BreakableToken::Split getCommentSplit(StringRef Text,

60

unsigned ContentStartColumn,

61

unsigned ColumnLimit,

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

62

unsigned TabWidth,

Craig Topper

bfb5c40

2013-07-01 03:38:29 +0000

[diff] [blame]

63

encoding::Encoding Encoding) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

64

if (ColumnLimit <= ContentStartColumn + 1)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

65

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

66

67

unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

68

unsigned MaxSplitBytes = 0;

69

70

for (unsigned NumChars = 0;

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

71

NumChars < MaxSplit && MaxSplitBytes < Text.size();) {

72

unsigned BytesInChar =

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

73

encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

74

NumChars +=

75

encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),

76

ContentStartColumn, TabWidth, Encoding);

77

MaxSplitBytes += BytesInChar;

78

}

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

79

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

80

StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

81

if (SpaceOffset == StringRef::npos ||

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

82

// Don't break at leading whitespace.

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

83

Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {

Manuel Klimek

ae1fbfb

2013-05-29 22:06:18 +0000

[diff] [blame]

84

// Make sure that we don't break at leading whitespace that

85

// reaches past MaxSplit.

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

86

StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);

Manuel Klimek

ae1fbfb

2013-05-29 22:06:18 +0000

[diff] [blame]

87

if (FirstNonWhitespace == StringRef::npos)

88

// If the comment is only whitespace, we cannot split.

89

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

90

SpaceOffset = Text.find_first_of(

91

Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));

Manuel Klimek

ae1fbfb

2013-05-29 22:06:18 +0000

[diff] [blame]

92

}

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

93

if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

94

StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);

95

StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

96

return BreakableToken::Split(BeforeCut.size(),

97

AfterCut.begin() - BeforeCut.end());

98

}

99

return BreakableToken::Split(StringRef::npos, 0);

100

}

101

Daniel Jasper

b05a81d

2014-05-09 13:11:16 +0000

[diff] [blame]

102

static BreakableToken::Split

103

getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,

104

unsigned TabWidth, encoding::Encoding Encoding) {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

105

// FIXME: Reduce unit test case.

106

if (Text.empty())

107

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-09-16 20:20:49 +0000

[diff] [blame]

108

if (ColumnLimit <= UsedColumns)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

109

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

71d95d6

2013-11-26 10:38:53 +0000

[diff] [blame]

110

unsigned MaxSplit = ColumnLimit - UsedColumns;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

111

StringRef::size_type SpaceOffset = 0;

112

StringRef::size_type SlashOffset = 0;

Alexander Kornienko

7285207

2013-06-19 14:22:47 +0000

[diff] [blame]

113

StringRef::size_type WordStartOffset = 0;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

114

StringRef::size_type SplitPoint = 0;

115

for (unsigned Chars = 0;;) {

116

unsigned Advance;

117

if (Text[0] == '\\') {

118

Advance = encoding::getEscapeSequenceLength(Text);

119

Chars += Advance;

120

} else {

121

Advance = encoding::getCodePointNumBytes(Text[0], Encoding);

Alexander Kornienko

2013-09-16 20:20:49 +0000

[diff] [blame]

122

Chars += encoding::columnWidthWithTabs(

123

Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

124

}

125

Daniel Jasper

e4b48c6

2015-01-21 19:50:35 +0000

[diff] [blame]

126

if (Chars > MaxSplit || Text.size() <= Advance)

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

127

break;

128

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

129

if (IsBlank(Text[0]))

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

130

SpaceOffset = SplitPoint;

131

if (Text[0] == '/')

132

SlashOffset = SplitPoint;

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

133

if (Advance == 1 && !isAlphanumeric(Text[0]))

Alexander Kornienko

7285207

2013-06-19 14:22:47 +0000

[diff] [blame]

134

WordStartOffset = SplitPoint;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

135

136

SplitPoint += Advance;

137

Text = Text.substr(Advance);

138

}

139

140

if (SpaceOffset != 0)

141

return BreakableToken::Split(SpaceOffset + 1, 0);

142

if (SlashOffset != 0)

143

return BreakableToken::Split(SlashOffset + 1, 0);

Alexander Kornienko

7285207

2013-06-19 14:22:47 +0000

[diff] [blame]

144

if (WordStartOffset != 0)

145

return BreakableToken::Split(WordStartOffset + 1, 0);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

146

if (SplitPoint != 0)

147

return BreakableToken::Split(SplitPoint, 0);

148

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

149

}

150

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

151

bool switchesFormatting(const FormatToken &Token) {

152

assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&

153

"formatting regions are switched by comment tokens");

154

StringRef Content = Token.TokenText.substr(2).ltrim();

155

return Content.startswith("clang-format on") ||

156

Content.startswith("clang-format off");

}

unsigned

BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,

161

Split Split) const {

162

// Example: consider the content

163

// lala lala

164

// - RemainingTokenColumns is the original number of columns, 10;

165

// - Split is (4, 2), denoting the two spaces between the two words;

166

//

167

// We compute the number of columns when the split is compressed into a single

168

// space, like:

169

// lala lala

170

return RemainingTokenColumns + 1 - Split.second;

171

}

172

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

173

unsigned BreakableSingleLineToken::getLineCount() const { return 1; }

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

174

Alexander Kornienko

dd7ece5

2013-06-07 16:02:52 +0000

[diff] [blame]

175

unsigned BreakableSingleLineToken::getLineLengthAfterSplit(

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

176

unsigned LineIndex, unsigned TailOffset,

177

StringRef::size_type Length) const {

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

178

return StartColumn + Prefix.size() + Postfix.size() +

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

179

encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

180

StartColumn + Prefix.size(),

181

Style.TabWidth, Encoding);

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

182

}

183

Alexander Kornienko

2013-06-14 11:46:10 +0000

[diff] [blame]

184

BreakableSingleLineToken::BreakableSingleLineToken(

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

185

const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

186

StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,

187

const FormatStyle &Style)

188

: BreakableToken(Tok, InPPDirective, Encoding, Style),

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

189

StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {

Alexander Kornienko

d4fa2e6

2017-04-11 09:55:00 +0000

[diff] [blame]

190

assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

191

Line = Tok.TokenText.substr(

192

Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

193

}

194

Alexander Kornienko

2013-09-16 20:20:49 +0000

[diff] [blame]

195

BreakableStringLiteral::BreakableStringLiteral(

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

196

const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

197

StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,

198

const FormatStyle &Style)

199

: BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,

200

Encoding, Style) {}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

201

202

BreakableToken::Split

203

BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,

Krasimir Georgiev

17725d8

2017-03-08 08:55:12 +0000

[diff] [blame]

204

unsigned ColumnLimit,

205

llvm::Regex &CommentPragmasRegex) const {

Alexander Kornienko

2013-09-16 20:20:49 +0000

[diff] [blame]

206

return getStringSplit(Line.substr(TailOffset),

207

StartColumn + Prefix.size() + Postfix.size(),

208

ColumnLimit, Style.TabWidth, Encoding);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

209

}

210

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

211

void BreakableStringLiteral::insertBreak(unsigned LineIndex,

212

unsigned TailOffset, Split Split,

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

213

WhitespaceManager &Whitespaces) {

214

Whitespaces.replaceWhitespaceInToken(

215

Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,

Alexander Kornienko

d4fa2e6

2017-04-11 09:55:00 +0000

[diff] [blame]

216

Prefix, InPPDirective, 1, StartColumn);

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

217

}

218

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

219

BreakableComment::BreakableComment(const FormatToken &Token,

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

220

unsigned StartColumn,

Krasimir Georgiev

2017-02-21 10:54:50 +0000

[diff] [blame]

221

bool InPPDirective,

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

222

encoding::Encoding Encoding,

223

const FormatStyle &Style)

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

224

: BreakableToken(Token, InPPDirective, Encoding, Style),

Krasimir Georgiev

2017-02-21 10:54:50 +0000

[diff] [blame]

225

StartColumn(StartColumn) {}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

226

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

227

unsigned BreakableComment::getLineCount() const { return Lines.size(); }

228

Krasimir Georgiev

17725d8

2017-03-08 08:55:12 +0000

[diff] [blame]

229

BreakableToken::Split

230

BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,

231

unsigned ColumnLimit,

232

llvm::Regex &CommentPragmasRegex) const {

233

// Don't break lines matching the comment pragmas regex.

234

if (CommentPragmasRegex.match(Content[LineIndex]))

235

return Split(StringRef::npos, 0);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

236

return getCommentSplit(Content[LineIndex].substr(TailOffset),

237

getContentStartColumn(LineIndex, TailOffset),

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

238

ColumnLimit, Style.TabWidth, Encoding);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

239

}

240

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

241

void BreakableComment::compressWhitespace(unsigned LineIndex,

242

unsigned TailOffset, Split Split,

243

WhitespaceManager &Whitespaces) {

244

StringRef Text = Content[LineIndex].substr(TailOffset);

245

// Text is relative to the content line, but Whitespaces operates relative to

246

// the start of the corresponding token, so compute the start of the Split

247

// that needs to be compressed into a single space relative to the start of

248

// its token.

249

unsigned BreakOffsetInToken =

250

Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

251

unsigned CharsToRemove = Split.second;

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

252

Whitespaces.replaceWhitespaceInToken(

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

253

tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

254

/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);

Alexander Kornienko

875395f

2013-11-12 17:50:13 +0000

[diff] [blame]

255

}

256

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

257

BreakableToken::Split

258

BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,

259

unsigned PreviousEndColumn,

260

unsigned ColumnLimit) const {

261

unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();

262

StringRef TrimmedText = Text.rtrim(Blanks);

263

// This is the width of the resulting line in case the full line of Text gets

264

// reflown up starting at ReflowStartColumn.

265

unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(

266

TrimmedText, ReflowStartColumn,

267

Style.TabWidth, Encoding);

268

// If the full line fits up, we return a reflow split after it,

269

// otherwise we compute the largest piece of text that fits after

270

// ReflowStartColumn.

271

Split ReflowSplit =

272

FullWidth <= ColumnLimit

273

? Split(TrimmedText.size(), Text.size() - TrimmedText.size())

274

: getCommentSplit(Text, ReflowStartColumn, ColumnLimit,

275

Style.TabWidth, Encoding);

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

276

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

277

// We need to be extra careful here, because while it's OK to keep a long line

278

// if it can't be broken into smaller pieces (like when the first word of a

279

// long line is longer than the column limit), it's not OK to reflow that long

280

// word up. So we recompute the size of the previous line after reflowing and

281

// only return the reflow split if that's under the line limit.

282

if (ReflowSplit.first != StringRef::npos &&

283

// Check if the width of the newly reflown line is under the limit.

284

PreviousEndColumn + ReflowPrefix.size() +

285

encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),

286

PreviousEndColumn +

287

ReflowPrefix.size(),

288

Style.TabWidth, Encoding) <=

289

ColumnLimit) {

290

return ReflowSplit;

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

291

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

292

return Split(StringRef::npos, 0);

293

}

294

295

const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {

296

return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;

297

}

298

299

static bool mayReflowContent(StringRef Content) {

300

Content = Content.trim(Blanks);

Krasimir Georgiev

28912c0

2017-02-02 10:52:08 +0000

[diff] [blame]

301

// Lines starting with '@' commonly have special meaning.

302

static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {

303

"@", "TODO", "FIXME", "XXX"};

304

bool hasSpecialMeaningPrefix = false;

305

for (StringRef Prefix : kSpecialMeaningPrefixes) {

306

if (Content.startswith(Prefix)) {

307

hasSpecialMeaningPrefix = true;

308

break;

309

}

310

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

311

// Simple heuristic for what to reflow: content should contain at least two

312

// characters and either the first or second character must be

313

// non-punctuation.

Krasimir Georgiev

28912c0

2017-02-02 10:52:08 +0000

[diff] [blame]

314

return Content.size() >= 2 && !hasSpecialMeaningPrefix &&

315

!Content.endswith("\\") &&

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

316

// Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is

317

// true, then the first code point must be 1 byte long.

318

(!isPunctuation(Content[0]) || !isPunctuation(Content[1]));

319

}

320

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

321

BreakableBlockComment::BreakableBlockComment(

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

322

const FormatToken &Token, unsigned StartColumn,

Alexander Kornienko

2013-06-14 11:46:10 +0000

[diff] [blame]

323

unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

324

encoding::Encoding Encoding, const FormatStyle &Style)

Krasimir Georgiev

2017-02-21 10:54:50 +0000

[diff] [blame]

325

: BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

326

assert(Tok.is(TT_BlockComment) &&

327

"block comment section must start with a block comment");

328

329

StringRef TokenText(Tok.TokenText);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

330

assert(TokenText.startswith("/*") && TokenText.endswith("*/"));

331

TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");

332

333

int IndentDelta = StartColumn - OriginalStartColumn;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

334

Content.resize(Lines.size());

335

Content[0] = Lines[0];

336

ContentColumn.resize(Lines.size());

337

// Account for the initial '/*'.

338

ContentColumn[0] = StartColumn + 2;

339

Tokens.resize(Lines.size());

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

340

for (size_t i = 1; i < Lines.size(); ++i)

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

341

adjustWhitespace(i, IndentDelta);

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

342

Krasimir Georgiev

2017-02-16 12:39:31 +0000

[diff] [blame]

343

// Align decorations with the column of the star on the first line,

344

// that is one column after the start "/*".

345

DecorationColumn = StartColumn + 1;

346

347

// Account for comment decoration patterns like this:

//

// /*

// ** blah blah blah

// */

if (Lines.size() >= 2 && Content[1].startswith("**") &&

353

static_cast<unsigned>(ContentColumn[1]) == StartColumn) {

354

DecorationColumn = StartColumn;

355

}

356

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

357

Decoration = "* ";

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

358

if (Lines.size() == 1 && !FirstInLine) {

359

// Comments for which FirstInLine is false can start on arbitrary column,

360

// and available horizontal space can be too small to align consecutive

361

// lines with the first one.

362

// FIXME: We could, probably, align them to current indentation level, but

363

// now we just wrap them without stars.

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

364

Decoration = "";

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

365

}

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

366

for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {

367

// If the last line is empty, the closing "*/" will have a star.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

368

if (i + 1 == e && Content[i].empty())

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

369

break;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

370

if (!Content[i].empty() && i + 1 != e &&

371

Decoration.startswith(Content[i]))

Daniel Jasper

6d9b88d

2015-05-06 07:17:22 +0000

[diff] [blame]

372

continue;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

373

while (!Content[i].startswith(Decoration))

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

374

Decoration = Decoration.substr(0, Decoration.size() - 1);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

375

}

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

376

377

LastLineNeedsDecoration = true;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

378

IndentAtLineBreak = ContentColumn[0] + 1;

379

for (size_t i = 1, e = Lines.size(); i < e; ++i) {

380

if (Content[i].empty()) {

381

if (i + 1 == e) {

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

382

// Empty last line means that we already have a star as a part of the

383

// trailing */. We also need to preserve whitespace, so that */ is

384

// correctly indented.

385

LastLineNeedsDecoration = false;

Krasimir Georgiev

2017-02-16 12:39:31 +0000

[diff] [blame]

386

// Align the star in the last '*/' with the stars on the previous lines.

387

if (e >= 2 && !Decoration.empty()) {

388

ContentColumn[i] = DecorationColumn;

389

}

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

390

} else if (Decoration.empty()) {

391

// For all other lines, set the start column to 0 if they're empty, so

392

// we do not insert trailing whitespace anywhere.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

393

ContentColumn[i] = 0;

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

394

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

395

continue;

396

}

Daniel Jasper

6d9b88d

2015-05-06 07:17:22 +0000

[diff] [blame]

397

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

398

// The first line already excludes the star.

Krasimir Georgiev

2017-02-16 12:39:31 +0000

[diff] [blame]

399

// The last line excludes the star if LastLineNeedsDecoration is false.

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

400

// For all other lines, adjust the line to exclude the star and

401

// (optionally) the first whitespace.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

402

unsigned DecorationSize = Decoration.startswith(Content[i])

403

? Content[i].size()

404

: Decoration.size();

Krasimir Georgiev

2017-02-16 12:39:31 +0000

[diff] [blame]

405

if (DecorationSize) {

406

ContentColumn[i] = DecorationColumn + DecorationSize;

407

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

408

Content[i] = Content[i].substr(DecorationSize);

409

if (!Decoration.startswith(Content[i]))

Daniel Jasper

6d9b88d

2015-05-06 07:17:22 +0000

[diff] [blame]

410

IndentAtLineBreak =

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

411

std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

412

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

413

IndentAtLineBreak =

414

std::max<unsigned>(IndentAtLineBreak, Decoration.size());

415

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

416

DEBUG({

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

417

llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

418

for (size_t i = 0; i < Lines.size(); ++i) {

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

419

llvm::dbgs() << i << " |" << Content[i] << "| "

Krasimir Georgiev

2017-02-16 12:39:31 +0000

[diff] [blame]

420

<< "CC=" << ContentColumn[i] << "| "

421

<< "IN=" << (Content[i].data() - Lines[i].data()) << "\n";

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

}

});

}

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

426

void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

427

int IndentDelta) {

Alexander Kornienko

2013-06-14 11:46:10 +0000

[diff] [blame]

428

// When in a preprocessor directive, the trailing backslash in a block comment

429

// is not needed, but can serve a purpose of uniformity with necessary escaped

430

// newlines outside the comment. In this case we remove it here before

431

// trimming the trailing whitespace. The backslash will be re-added later when

432

// inserting a line break.

433

size_t EndOfPreviousLine = Lines[LineIndex - 1].size();

434

if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))

435

--EndOfPreviousLine;

436

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

437

// Calculate the end of the non-whitespace text in the previous line.

Alexander Kornienko

2013-06-14 11:46:10 +0000

[diff] [blame]

438

EndOfPreviousLine =

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

439

Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

440

if (EndOfPreviousLine == StringRef::npos)

441

EndOfPreviousLine = 0;

442

else

443

++EndOfPreviousLine;

444

// Calculate the start of the non-whitespace text in the current line.

Alexander Kornienko

2013-06-20 13:58:37 +0000

[diff] [blame]

445

size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

446

if (StartOfLine == StringRef::npos)

Daniel Jasper

d6e6188

2015-06-17 12:23:15 +0000

[diff] [blame]

447

StartOfLine = Lines[LineIndex].rtrim("\r\n").size();

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

448

Alexander Kornienko

2013-09-05 14:08:34 +0000

[diff] [blame]

449

StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

450

// Adjust Lines to only contain relevant text.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

451

size_t PreviousContentOffset =

452

Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();

453

Content[LineIndex - 1] = Lines[LineIndex - 1].substr(

454

PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);

455

Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);

Manuel Klimek

34d1515

2013-05-28 10:01:59 +0000

[diff] [blame]

456

Alp Toker

f6a24ce

2013-12-05 16:25:25 +0000

[diff] [blame]

457

// Adjust the start column uniformly across all lines.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

458

ContentColumn[LineIndex] =

Alexander Kornienko

39856b7

2013-09-10 09:38:25 +0000

[diff] [blame]

459

encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +

Alexander Kornienko

67d9c8c

2014-04-17 16:12:46 +0000

[diff] [blame]

460

IndentDelta;

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

461

}

462

Alexander Kornienko

dd7ece5

2013-06-07 16:02:52 +0000

[diff] [blame]

463

unsigned BreakableBlockComment::getLineLengthAfterSplit(

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

464

unsigned LineIndex, unsigned TailOffset,

465

StringRef::size_type Length) const {

466

unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);

467

unsigned LineLength =

468

ContentStartColumn + encoding::columnWidthWithTabs(

469

Content[LineIndex].substr(TailOffset, Length),

470

ContentStartColumn, Style.TabWidth, Encoding);

471

// The last line gets a "*/" postfix.

472

if (LineIndex + 1 == Lines.size()) {

473

LineLength += 2;

474

// We never need a decoration when breaking just the trailing "*/" postfix.

475

// Note that checking that Length == 0 is not enough, since Length could

476

// also be StringRef::npos.

477

if (Content[LineIndex].substr(TailOffset, Length).empty()) {

478

LineLength -= Decoration.size();

479

}

480

}

481

return LineLength;

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

482

}

483

484

void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

Alexander Kornienko

2013-06-14 11:46:10 +0000

[diff] [blame]

485

Split Split,

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

486

WhitespaceManager &Whitespaces) {

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

487

StringRef Text = Content[LineIndex].substr(TailOffset);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

488

StringRef Prefix = Decoration;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

489

// We need this to account for the case when we have a decoration "* " for all

490

// the lines except for the last one, where the star in "*/" acts as a

491

// decoration.

492

unsigned LocalIndentAtLineBreak = IndentAtLineBreak;

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

493

if (LineIndex + 1 == Lines.size() &&

494

Text.size() == Split.first + Split.second) {

495

// For the last line we need to break before "*/", but not to add "* ".

496

Prefix = "";

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

497

if (LocalIndentAtLineBreak >= 2)

498

LocalIndentAtLineBreak -= 2;

499

}

500

// The split offset is from the beginning of the line. Convert it to an offset

501

// from the beginning of the token text.

502

unsigned BreakOffsetInToken =

503

Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

504

unsigned CharsToRemove = Split.second;

505

assert(LocalIndentAtLineBreak >= Prefix.size());

506

Whitespaces.replaceWhitespaceInToken(

507

tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

508

InPPDirective, /*Newlines=*/1,

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

509

/*Spaces=*/LocalIndentAtLineBreak - Prefix.size());

510

}

511

512

BreakableToken::Split BreakableBlockComment::getSplitBefore(

513

unsigned LineIndex,

514

unsigned PreviousEndColumn,

Krasimir Georgiev

00c5c72

2017-02-02 15:32:19 +0000

[diff] [blame]

515

unsigned ColumnLimit,

516

llvm::Regex &CommentPragmasRegex) const {

517

if (!mayReflow(LineIndex, CommentPragmasRegex))

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

518

return Split(StringRef::npos, 0);

519

StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

520

return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,

ColumnLimit);

}

unsigned BreakableBlockComment::getReflownColumn(

525

StringRef Content,

526

unsigned LineIndex,

527

unsigned PreviousEndColumn) const {

528

unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();

529

// If this is the last line, it will carry around its '*/' postfix.

530

unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);

531

// The line is composed of previous text, reflow prefix, reflown text and

532

// postfix.

533

unsigned ReflownColumn =

534

StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,

535

Style.TabWidth, Encoding) +

536

PostfixLength;

537

return ReflownColumn;

538

}

539

540

unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(

541

unsigned LineIndex, unsigned TailOffset,

542

unsigned PreviousEndColumn,

543

unsigned ColumnLimit,

544

Split SplitBefore) const {

Krasimir Georgiev

af1b962

2017-01-31 14:31:44 +0000

[diff] [blame]

545

if (SplitBefore.first == StringRef::npos ||

546

// Block comment line contents contain the trailing whitespace after the

547

// decoration, so the need of left trim. Note that this behavior is

548

// consistent with the breaking of block comments where the indentation of

549

// a broken line is uniform across all the lines of the block comment.

550

SplitBefore.first + SplitBefore.second <

551

Content[LineIndex].ltrim().size()) {

552

// A piece of line, not the whole, gets reflown.

553

return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

554

} else {

555

// The whole line gets reflown, need to check if we need to insert a break

556

// for the postfix or not.

557

StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

558

unsigned ReflownColumn =

559

getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);

560

if (ReflownColumn <= ColumnLimit) {

561

return ReflownColumn;

562

}

563

return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

564

}

565

}

566

void BreakableBlockComment::replaceWhitespaceBefore(

567

unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

568

Split SplitBefore, WhitespaceManager &Whitespaces) {

569

if (LineIndex == 0) return;

570

StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

571

if (SplitBefore.first != StringRef::npos) {

572

// Here we need to reflow.

573

assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&

574

"Reflowing whitespace within a token");

575

// This is the offset of the end of the last line relative to the start of

576

// the token text in the token.

577

unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

578

Content[LineIndex - 1].size() -

579

tokenAt(LineIndex).TokenText.data();

580

unsigned WhitespaceLength = TrimmedContent.data() -

581

tokenAt(LineIndex).TokenText.data() -

582

WhitespaceOffsetInToken;

583

Whitespaces.replaceWhitespaceInToken(

584

tokenAt(LineIndex), WhitespaceOffsetInToken,

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

585

/*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",

586

/*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,

587

/*Spaces=*/0);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

588

// Check if we need to also insert a break at the whitespace range.

589

// For this we first adapt the reflow split relative to the beginning of the

590

// content.

591

// Note that we don't need a penalty for this break, since it doesn't change

592

// the total number of lines.

593

Split BreakSplit = SplitBefore;

594

BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();

595

unsigned ReflownColumn =

596

getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);

597

if (ReflownColumn > ColumnLimit) {

598

insertBreak(LineIndex, 0, BreakSplit, Whitespaces);

599

}

600

return;

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

601

}

602

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

603

// Here no reflow with the previous line will happen.

604

// Fix the decoration of the line at LineIndex.

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

605

StringRef Prefix = Decoration;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

606

if (Content[LineIndex].empty()) {

Manuel Klimek

281dcbe

2013-05-28 08:55:01 +0000

[diff] [blame]

607

if (LineIndex + 1 == Lines.size()) {

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

608

if (!LastLineNeedsDecoration) {

609

// If the last line was empty, we don't need a prefix, as the */ will

610

// line up with the decoration (if it exists).

611

Prefix = "";

612

}

Manuel Klimek

281dcbe

2013-05-28 08:55:01 +0000

[diff] [blame]

613

} else if (!Decoration.empty()) {

614

// For other empty lines, if we do have a decoration, adapt it to not

615

// contain a trailing whitespace.

616

Prefix = Prefix.substr(0, 1);

617

}

Daniel Jasper

51fb2b2

2013-05-30 06:40:07 +0000

[diff] [blame]

618

} else {

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

619

if (ContentColumn[LineIndex] == 1) {

Alexander Kornienko

2013-07-08 14:12:07 +0000

[diff] [blame]

620

// This line starts immediately after the decorating *.

Daniel Jasper

51fb2b2

2013-05-30 06:40:07 +0000

[diff] [blame]

621

Prefix = Prefix.substr(0, 1);

622

}

Manuel Klimek

281dcbe

2013-05-28 08:55:01 +0000

[diff] [blame]

623

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

624

// This is the offset of the end of the last line relative to the start of the

625

// token text in the token.

626

unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

627

Content[LineIndex - 1].size() -

628

tokenAt(LineIndex).TokenText.data();

629

unsigned WhitespaceLength = Content[LineIndex].data() -

630

tokenAt(LineIndex).TokenText.data() -

631

WhitespaceOffsetInToken;

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame]

632

Whitespaces.replaceWhitespaceInToken(

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

633

tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,

634

InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

635

}

636

Krasimir Georgiev

00c5c72

2017-02-02 15:32:19 +0000

[diff] [blame]

637

bool BreakableBlockComment::mayReflow(unsigned LineIndex,

638

llvm::Regex &CommentPragmasRegex) const {

639

// Content[LineIndex] may exclude the indent after the '*' decoration. In that

640

// case, we compute the start of the comment pragma manually.

641

StringRef IndentContent = Content[LineIndex];

642

if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {

643

IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);

644

}

645

return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&

646

mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&

647

!switchesFormatting(tokenAt(LineIndex));

648

}

649

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

650

unsigned

651

BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

652

unsigned TailOffset) const {

653

// If we break, we always break at the predefined indent.

654

if (TailOffset != 0)

655

return IndentAtLineBreak;

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

656

return std::max(0, ContentColumn[LineIndex]);

657

}

658

659

BreakableLineCommentSection::BreakableLineCommentSection(

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

660

const FormatToken &Token, unsigned StartColumn,

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

661

unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

662

encoding::Encoding Encoding, const FormatStyle &Style)

Krasimir Georgiev

2017-02-21 10:54:50 +0000

[diff] [blame]

663

: BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

664

assert(Tok.is(TT_LineComment) &&

665

"line comment section must start with a line comment");

666

FormatToken *LineTok = nullptr;

667

for (const FormatToken *CurrentTok = &Tok;

668

CurrentTok && CurrentTok->is(TT_LineComment);

669

CurrentTok = CurrentTok->Next) {

670

LastLineTok = LineTok;

671

StringRef TokenText(CurrentTok->TokenText);

672

assert(TokenText.startswith("//"));

673

size_t FirstLineIndex = Lines.size();

674

TokenText.split(Lines, "\n");

675

Content.resize(Lines.size());

676

ContentColumn.resize(Lines.size());

677

OriginalContentColumn.resize(Lines.size());

678

Tokens.resize(Lines.size());

679

Prefix.resize(Lines.size());

680

OriginalPrefix.resize(Lines.size());

681

for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {

Krasimir Georgiev

e518e0b

2017-01-30 21:00:01 +0000

[diff] [blame]

682

// We need to trim the blanks in case this is not the first line in a

683

// multiline comment. Then the indent is included in Lines[i].

684

StringRef IndentPrefix =

685

getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));

686

assert(IndentPrefix.startswith("//"));

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

687

OriginalPrefix[i] = Prefix[i] = IndentPrefix;

688

if (Lines[i].size() > Prefix[i].size() &&

689

isAlphanumeric(Lines[i][Prefix[i].size()])) {

690

if (Prefix[i] == "//")

691

Prefix[i] = "// ";

692

else if (Prefix[i] == "///")

693

Prefix[i] = "/// ";

694

else if (Prefix[i] == "//!")

695

Prefix[i] = "//! ";

Krasimir Georgiev

ba6b315

2017-05-18 07:36:21 +0000

[diff] [blame]

696

else if (Prefix[i] == "///<")

697

Prefix[i] = "///< ";

698

else if (Prefix[i] == "//!<")

699

Prefix[i] = "//!< ";

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

}

Tokens[i] = LineTok;

Content[i] = Lines[i].substr(IndentPrefix.size());

704

OriginalContentColumn[i] =

705

StartColumn +

706

encoding::columnWidthWithTabs(OriginalPrefix[i],

StartColumn,

Style.TabWidth,

Encoding);

ContentColumn[i] =

StartColumn +

encoding::columnWidthWithTabs(Prefix[i],

StartColumn,

Style.TabWidth,

Encoding);

// Calculate the end of the non-whitespace text in this line.

718

size_t EndOfLine = Content[i].find_last_not_of(Blanks);

719

if (EndOfLine == StringRef::npos)

720

EndOfLine = Content[i].size();

721

else

722

++EndOfLine;

723

Content[i] = Content[i].substr(0, EndOfLine);

724

}

725

LineTok = CurrentTok->Next;

Krasimir Georgiev

b6ccd38

2017-02-02 14:36:50 +0000

[diff] [blame]

726

if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {

Krasimir Georgiev

753625b

2017-01-31 13:32:38 +0000

[diff] [blame]

727

// A line comment section needs to broken by a line comment that is

728

// preceded by at least two newlines. Note that we put this break here

729

// instead of breaking at a previous stage during parsing, since that

730

// would split the contents of the enum into two unwrapped lines in this

731

// example, which is undesirable:

732

// enum A {

733

// a, // comment about a

734

//

735

// // comment about b

// b

// };

//

// FIXME: Consider putting separate line comment sections as children to

740

// the unwrapped line instead.

741

break;

742

}

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

}

}

unsigned BreakableLineCommentSection::getLineLengthAfterSplit(

747

unsigned LineIndex, unsigned TailOffset,

748

StringRef::size_type Length) const {

749

unsigned ContentStartColumn =

750

(TailOffset == 0 ? ContentColumn[LineIndex]

751

: OriginalContentColumn[LineIndex]);

752

return ContentStartColumn + encoding::columnWidthWithTabs(

753

Content[LineIndex].substr(TailOffset, Length),

754

ContentStartColumn, Style.TabWidth, Encoding);

755

}

756

757

void BreakableLineCommentSection::insertBreak(unsigned LineIndex,

758

unsigned TailOffset, Split Split,

759

WhitespaceManager &Whitespaces) {

760

StringRef Text = Content[LineIndex].substr(TailOffset);

761

// Compute the offset of the split relative to the beginning of the token

762

// text.

763

unsigned BreakOffsetInToken =

764

Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;

765

unsigned CharsToRemove = Split.second;

766

// Compute the size of the new indent, including the size of the new prefix of

767

// the newly broken line.

768

unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +

769

Prefix[LineIndex].size() -

770

OriginalPrefix[LineIndex].size();

771

assert(IndentAtLineBreak >= Prefix[LineIndex].size());

772

Whitespaces.replaceWhitespaceInToken(

773

tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

774

Prefix[LineIndex], InPPDirective, /*Newlines=*/1,

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

775

/*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());

776

}

777

778

BreakableComment::Split BreakableLineCommentSection::getSplitBefore(

Krasimir Georgiev

00c5c72

2017-02-02 15:32:19 +0000

[diff] [blame]

779

unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

780

llvm::Regex &CommentPragmasRegex) const {

781

if (!mayReflow(LineIndex, CommentPragmasRegex))

782

return Split(StringRef::npos, 0);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

783

return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,

ColumnLimit);

}

unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(

788

unsigned LineIndex, unsigned TailOffset,

789

unsigned PreviousEndColumn,

790

unsigned ColumnLimit,

791

Split SplitBefore) const {

792

if (SplitBefore.first == StringRef::npos ||

793

SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {

794

// A piece of line, not the whole line, gets reflown.

795

return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

796

} else {

797

// The whole line gets reflown.

798

unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();

799

return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],

StartColumn,

Style.TabWidth,

Encoding);

}

}

void BreakableLineCommentSection::replaceWhitespaceBefore(

807

unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

808

Split SplitBefore, WhitespaceManager &Whitespaces) {

809

// If this is the first line of a token, we need to inform Whitespace Manager

810

// about it: either adapt the whitespace range preceding it, or mark it as an

811

// untouchable token.

812

// This happens for instance here:

813

// // line 1 \

814

// // line 2

815

if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

816

if (SplitBefore.first != StringRef::npos) {

817

// Reflow happens between tokens. Replace the whitespace between the

818

// tokens by the empty string.

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

819

Whitespaces.replaceWhitespace(

820

*Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,

821

/*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

822

// Replace the indent and prefix of the token with the reflow prefix.

823

unsigned WhitespaceLength =

824

Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();

825

Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],

826

/*Offset=*/0,

827

/*ReplaceChars=*/WhitespaceLength,

828

/*PreviousPostfix=*/"",

829

/*CurrentPrefix=*/ReflowPrefix,

830

/*InPPDirective=*/false,

831

/*Newlines=*/0,

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

832

/*Spaces=*/0);

833

} else {

834

// This is the first line for the current token, but no reflow with the

835

// previous token is necessary. However, we still may need to adjust the

Krasimir Georgiev

2017-01-31 15:40:15 +0000

[diff] [blame]

836

// start column. Note that ContentColumn[LineIndex] is the expected

837

// content column after a possible update to the prefix, hence the prefix

838

// length change is included.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

839

unsigned LineColumn =

840

ContentColumn[LineIndex] -

Krasimir Georgiev

2017-01-31 15:40:15 +0000

[diff] [blame]

841

(Content[LineIndex].data() - Lines[LineIndex].data()) +

842

(OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());

Krasimir Georgiev

13dbaa0

2017-02-01 10:10:04 +0000

[diff] [blame]

843

844

// We always want to create a replacement instead of adding an untouchable

845

// token, even if LineColumn is the same as the original column of the

846

// token. This is because WhitespaceManager doesn't align trailing

847

// comments if they are untouchable.

848

Whitespaces.replaceWhitespace(*Tokens[LineIndex],

849

/*Newlines=*/1,

850

/*Spaces=*/LineColumn,

851

/*StartOfTokenColumn=*/LineColumn,

852

/*InPPDirective=*/false);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

853

}

Krasimir Georgiev

2017-01-31 15:40:15 +0000

[diff] [blame]

854

}

855

if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {

856

// Adjust the prefix if necessary.

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

857

858

// Take care of the space possibly introduced after a decoration.

859

assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&

Krasimir Georgiev

2017-01-31 15:40:15 +0000

[diff] [blame]

860

"Expecting a line comment prefix to differ from original by at most "

861

"a space");

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

862

Whitespaces.replaceWhitespaceInToken(

863

tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",

Daniel Jasper

2017-01-31 11:25:01 +0000

[diff] [blame]

864

/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

865

}

866

// Add a break after a reflow split has been introduced, if necessary.

867

// Note that this break doesn't need to be penalized, since it doesn't change

868

// the number of lines.

869

if (SplitBefore.first != StringRef::npos &&

870

SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {

871

insertBreak(LineIndex, 0, SplitBefore, Whitespaces);

}

}

void BreakableLineCommentSection::updateNextToken(LineState& State) const {

876

if (LastLineTok) {

877

State.NextToken = LastLineTok->Next;

}

}

Krasimir Georgiev

2017-02-02 15:32:19 +0000

[diff] [blame]

881

bool BreakableLineCommentSection::mayReflow(

882

unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {

883

// Line comments have the indent as part of the prefix, so we need to

884

// recompute the start of the line.

885

StringRef IndentContent = Content[LineIndex];

886

if (Lines[LineIndex].startswith("//")) {

887

IndentContent = Lines[LineIndex].substr(2);

888

}

889

return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&

890

mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&

891

!switchesFormatting(tokenAt(LineIndex)) &&

892

OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];

893

}

894

Krasimir Georgiev

2017-01-25 13:58:58 +0000

[diff] [blame]

895

unsigned

896

BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,

897

unsigned TailOffset) const {

898

if (TailOffset != 0) {

899

return OriginalContentColumn[LineIndex];

900

}

901

return ContentColumn[LineIndex];

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

902

}

903

Alexander Kornienko