Blame - lib/Format/BreakableToken.cpp - fp2-dev/platform/external/clang

2013-04-15 14:28:00 +0000

[diff] [blame]

1

//===--- BreakableToken.cpp - Format C++ code -----------------------------===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

///

10

/// \file

11

/// \brief Contains implementation of BreakableToken class and classes derived

12

/// from it.

13

///

14

//===----------------------------------------------------------------------===//

15

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

16

#define DEBUG_TYPE "format-token-breaker"

17

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

18

#include "BreakableToken.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

19

#include "clang/Format/Format.h"

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

20

#include "llvm/ADT/STLExtras.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

21

#include "llvm/Support/Debug.h"

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

#include <algorithm>

namespace clang {

namespace format {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

26

namespace {

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

27

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

28

BreakableToken::Split getCommentSplit(StringRef Text,

29

unsigned ContentStartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

30

unsigned ColumnLimit,

31

encoding::Encoding Encoding) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

32

if (ColumnLimit <= ContentStartColumn + 1)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

33

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

34

35

unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

36

unsigned MaxSplitBytes = 0;

37

38

for (unsigned NumChars = 0;

39

NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)

40

MaxSplitBytes +=

41

encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);

42

43

StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplitBytes);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

44

if (SpaceOffset == StringRef::npos ||

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

45

// Don't break at leading whitespace.

Manuel Klimek

be9ed77

2013-05-29 22:06:18 +0000

[diff] [blame]

46

Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {

47

// Make sure that we don't break at leading whitespace that

48

// reaches past MaxSplit.

49

StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");

50

if (FirstNonWhitespace == StringRef::npos)

51

// If the comment is only whitespace, we cannot split.

52

return BreakableToken::Split(StringRef::npos, 0);

53

SpaceOffset =

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

54

Text.find(' ', std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));

Manuel Klimek

be9ed77

2013-05-29 22:06:18 +0000

[diff] [blame]

55

}

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

56

if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

57

StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();

58

StringRef AfterCut = Text.substr(SpaceOffset).ltrim();

59

return BreakableToken::Split(BeforeCut.size(),

60

AfterCut.begin() - BeforeCut.end());

61

}

62

return BreakableToken::Split(StringRef::npos, 0);

63

}

64

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

65

BreakableToken::Split getStringSplit(StringRef Text,

66

unsigned ContentStartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

67

unsigned ColumnLimit,

68

encoding::Encoding Encoding) {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

69

// FIXME: Reduce unit test case.

70

if (Text.empty())

71

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

72

if (ColumnLimit <= ContentStartColumn)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

73

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

74

unsigned MaxSplit =

75

std::min<unsigned>(ColumnLimit - ContentStartColumn,

76

encoding::getCodePointCount(Text, Encoding) - 1);

77

StringRef::size_type SpaceOffset = 0;

78

StringRef::size_type SlashOffset = 0;

79

StringRef::size_type SplitPoint = 0;

80

for (unsigned Chars = 0;;) {

81

unsigned Advance;

82

if (Text[0] == '\\') {

83

Advance = encoding::getEscapeSequenceLength(Text);

84

Chars += Advance;

85

} else {

86

Advance = encoding::getCodePointNumBytes(Text[0], Encoding);

Chars += 1;

}

if (Chars > MaxSplit)

break;

if (Text[0] == ' ')

SpaceOffset = SplitPoint;

95

if (Text[0] == '/')

96

SlashOffset = SplitPoint;

97

98

SplitPoint += Advance;

99

Text = Text.substr(Advance);

100

}

101

102

if (SpaceOffset != 0)

103

return BreakableToken::Split(SpaceOffset + 1, 0);

104

if (SlashOffset != 0)

105

return BreakableToken::Split(SlashOffset + 1, 0);

106

if (SplitPoint != 0)

107

return BreakableToken::Split(SplitPoint, 0);

108

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

109

}

110

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

111

} // namespace

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

112

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

113

unsigned BreakableSingleLineToken::getLineCount() const { return 1; }

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

114

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

115

unsigned BreakableSingleLineToken::getLineLengthAfterSplit(

116

unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

117

return StartColumn + Prefix.size() + Postfix.size() +

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

118

encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

119

}

120

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

121

void BreakableSingleLineToken::insertBreak(unsigned LineIndex,

122

unsigned TailOffset, Split Split,

123

bool InPPDirective,

124

WhitespaceManager &Whitespaces) {

125

Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,

126

Split.second, Postfix, Prefix, InPPDirective,

127

StartColumn);

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

128

}

129

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

130

BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,

131

unsigned StartColumn,

132

StringRef Prefix,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

133

StringRef Postfix,

134

encoding::Encoding Encoding)

135

: BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

136

Postfix(Postfix) {

137

assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));

138

Line = Tok.TokenText.substr(

139

Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

140

}

141

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

142

BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

143

unsigned StartColumn,

144

encoding::Encoding Encoding)

145

: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

146

147

BreakableToken::Split

148

BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,

149

unsigned ColumnLimit) const {

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

150

return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,

151

Encoding);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

152

}

153

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

154

static StringRef getLineCommentPrefix(StringRef Comment) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

155

const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

156

for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

157

if (Comment.startswith(KnownPrefixes[i]))

158

return KnownPrefixes[i];

return "";

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

162

BreakableLineComment::BreakableLineComment(const FormatToken &Token,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

163

unsigned StartColumn,

164

encoding::Encoding Encoding)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

165

: BreakableSingleLineToken(Token, StartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

166

getLineCommentPrefix(Token.TokenText), "",

167

Encoding) {}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

168

169

BreakableToken::Split

170

BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,

171

unsigned ColumnLimit) const {

172

return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

173

ColumnLimit, Encoding);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

174

}

175

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

176

BreakableBlockComment::BreakableBlockComment(

177

const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,

178

unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)

179

: BreakableToken(Token, Encoding) {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

180

StringRef TokenText(Token.TokenText);

181

assert(TokenText.startswith("/*") && TokenText.endswith("*/"));

182

TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");

183

184

int IndentDelta = StartColumn - OriginalStartColumn;

185

bool NeedsStar = true;

186

LeadingWhitespace.resize(Lines.size());

187

StartOfLineColumn.resize(Lines.size());

188

if (Lines.size() == 1 && !FirstInLine) {

189

// Comments for which FirstInLine is false can start on arbitrary column,

190

// and available horizontal space can be too small to align consecutive

191

// lines with the first one.

192

// FIXME: We could, probably, align them to current indentation level, but

193

// now we just wrap them without stars.

194

NeedsStar = false;

195

}

196

StartOfLineColumn[0] = StartColumn + 2;

197

for (size_t i = 1; i < Lines.size(); ++i) {

198

adjustWhitespace(Style, i, IndentDelta);

199

if (Lines[i].empty())

200

// If the last line is empty, the closing "*/" will have a star.

201

NeedsStar = NeedsStar && i + 1 == Lines.size();

202

else

203

NeedsStar = NeedsStar && Lines[i][0] == '*';

204

}

205

Decoration = NeedsStar ? "* " : "";

206

IndentAtLineBreak = StartOfLineColumn[0] + 1;

207

for (size_t i = 1; i < Lines.size(); ++i) {

208

if (Lines[i].empty()) {

209

if (!NeedsStar && i + 1 != Lines.size())

210

// For all but the last line (which always ends in */), set the

211

// start column to 0 if they're empty, so we do not insert

212

// trailing whitespace anywhere.

213

StartOfLineColumn[i] = 0;

continue;

}

if (NeedsStar) {

// The first line already excludes the star.

218

// For all other lines, adjust the line to exclude the star and

219

// (optionally) the first whitespace.

220

int Offset = Lines[i].startswith("* ") ? 2 : 1;

221

StartOfLineColumn[i] += Offset;

222

Lines[i] = Lines[i].substr(Offset);

223

LeadingWhitespace[i] += Offset;

224

}

225

IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);

226

}

Daniel Jasper

cb4b40b

2013-05-30 17:27:48 +0000

[diff] [blame]

227

IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

228

DEBUG({

229

for (size_t i = 0; i < Lines.size(); ++i) {

230

llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]

<< "\n";

}

});

}

void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,

237

unsigned LineIndex,

238

int IndentDelta) {

239

// Calculate the end of the non-whitespace text in the previous line.

240

size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");

241

if (EndOfPreviousLine == StringRef::npos)

242

EndOfPreviousLine = 0;

243

else

244

++EndOfPreviousLine;

245

// Calculate the start of the non-whitespace text in the current line.

246

size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");

247

if (StartOfLine == StringRef::npos)

248

StartOfLine = Lines[LineIndex].size();

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

249

250

// Adjust Lines to only contain relevant text.

251

Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);

252

Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);

253

// Adjust LeadingWhitespace to account all whitespace between the lines

254

// to the current line.

255

LeadingWhitespace[LineIndex] =

256

Lines[LineIndex].begin() - Lines[LineIndex - 1].end();

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

257

258

// FIXME: We currently count tabs as 1 character. To solve this, we need to

259

// get the correct indentation width of the start of the comment, which

260

// requires correct counting of the tab expansions before the comment, and

261

// a configurable tab width. Since the current implementation only breaks

262

// if leading tabs are intermixed with spaces, that is not a high priority.

263

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

264

// Adjust the start column uniformly accross all lines.

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

265

StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

266

}

267

268

unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }

269

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

270

unsigned BreakableBlockComment::getLineLengthAfterSplit(

271

unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {

272

return getContentStartColumn(LineIndex, Offset) +

273

encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

274

Encoding) +

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

275

// The last line gets a "*/" postfix.

276

(LineIndex + 1 == Lines.size() ? 2 : 0);

277

}

278

279

BreakableToken::Split

280

BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,

281

unsigned ColumnLimit) const {

282

return getCommentSplit(Lines[LineIndex].substr(TailOffset),

283

getContentStartColumn(LineIndex, TailOffset),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

284

ColumnLimit, Encoding);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

285

}

286

287

void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

288

Split Split, bool InPPDirective,

289

WhitespaceManager &Whitespaces) {

290

StringRef Text = Lines[LineIndex].substr(TailOffset);

291

StringRef Prefix = Decoration;

292

if (LineIndex + 1 == Lines.size() &&

293

Text.size() == Split.first + Split.second) {

294

// For the last line we need to break before "*/", but not to add "* ".

Prefix = "";

}

unsigned BreakOffsetInToken =

299

Text.data() - Tok.TokenText.data() + Split.first;

300

unsigned CharsToRemove = Split.second;

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

301

assert(IndentAtLineBreak >= Decoration.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

302

Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,

303

InPPDirective, IndentAtLineBreak - Decoration.size());

}

void

BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,

308

unsigned InPPDirective,

309

WhitespaceManager &Whitespaces) {

310

if (LineIndex == 0)

311

return;

312

StringRef Prefix = Decoration;

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

313

if (Lines[LineIndex].empty()) {

314

if (LineIndex + 1 == Lines.size()) {

315

// If the last line is empty, we don't need a prefix, as the */ will line

316

// up with the decoration (if it exists).

317

Prefix = "";

318

} else if (!Decoration.empty()) {

319

// For other empty lines, if we do have a decoration, adapt it to not

320

// contain a trailing whitespace.

321

Prefix = Prefix.substr(0, 1);

322

}

Daniel Jasper

e2c482f

2013-05-30 06:40:07 +0000

[diff] [blame]

323

} else {

324

if (StartOfLineColumn[LineIndex] == 1) {

325

// This lines starts immediately after the decorating *.

326

Prefix = Prefix.substr(0, 1);

327

}

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

328

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

329

330

unsigned WhitespaceOffsetInToken =

331

Lines[LineIndex].data() - Tok.TokenText.data() -

332

LeadingWhitespace[LineIndex];

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

333

assert(StartOfLineColumn[LineIndex] >= Prefix.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

334

Whitespaces.breakToken(

335

Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,

336

InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());

}

unsigned

BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

341

unsigned TailOffset) const {

342

// If we break, we always break at the predefined indent.

343

if (TailOffset != 0)

344

return IndentAtLineBreak;

345

return StartOfLineColumn[LineIndex];

346

}

347

Alexander Kornienko