Blame - lib/Format/BreakableToken.cpp - fp2-dev/platform/external/clang

2013-04-15 14:28:00 +0000

[diff] [blame]

1

//===--- BreakableToken.cpp - Format C++ code -----------------------------===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

///

10

/// \file

11

/// \brief Contains implementation of BreakableToken class and classes derived

12

/// from it.

13

///

14

//===----------------------------------------------------------------------===//

15

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

16

#define DEBUG_TYPE "format-token-breaker"

17

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

18

#include "BreakableToken.h"

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

19

#include "clang/Basic/CharInfo.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

20

#include "clang/Format/Format.h"

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

21

#include "llvm/ADT/STLExtras.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

22

#include "llvm/Support/Debug.h"

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

#include <algorithm>

namespace clang {

namespace format {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

27

namespace {

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

28

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

29

BreakableToken::Split getCommentSplit(StringRef Text,

30

unsigned ContentStartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

31

unsigned ColumnLimit,

32

encoding::Encoding Encoding) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

33

if (ColumnLimit <= ContentStartColumn + 1)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

34

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

35

36

unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

37

unsigned MaxSplitBytes = 0;

38

39

for (unsigned NumChars = 0;

40

NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)

41

MaxSplitBytes +=

42

encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);

43

44

StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplitBytes);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

45

if (SpaceOffset == StringRef::npos ||

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

46

// Don't break at leading whitespace.

Manuel Klimek

be9ed77

2013-05-29 22:06:18 +0000

[diff] [blame]

47

Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {

48

// Make sure that we don't break at leading whitespace that

49

// reaches past MaxSplit.

50

StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");

51

if (FirstNonWhitespace == StringRef::npos)

52

// If the comment is only whitespace, we cannot split.

53

return BreakableToken::Split(StringRef::npos, 0);

54

SpaceOffset =

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

55

Text.find(' ', std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));

Manuel Klimek

be9ed77

2013-05-29 22:06:18 +0000

[diff] [blame]

56

}

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

57

if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

58

StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();

59

StringRef AfterCut = Text.substr(SpaceOffset).ltrim();

60

return BreakableToken::Split(BeforeCut.size(),

61

AfterCut.begin() - BeforeCut.end());

62

}

63

return BreakableToken::Split(StringRef::npos, 0);

64

}

65

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

66

BreakableToken::Split getStringSplit(StringRef Text,

67

unsigned ContentStartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

68

unsigned ColumnLimit,

69

encoding::Encoding Encoding) {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

70

// FIXME: Reduce unit test case.

71

if (Text.empty())

72

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

73

if (ColumnLimit <= ContentStartColumn)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

74

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

75

unsigned MaxSplit =

76

std::min<unsigned>(ColumnLimit - ContentStartColumn,

77

encoding::getCodePointCount(Text, Encoding) - 1);

78

StringRef::size_type SpaceOffset = 0;

79

StringRef::size_type SlashOffset = 0;

80

StringRef::size_type SplitPoint = 0;

81

for (unsigned Chars = 0;;) {

82

unsigned Advance;

83

if (Text[0] == '\\') {

84

Advance = encoding::getEscapeSequenceLength(Text);

85

Chars += Advance;

86

} else {

87

Advance = encoding::getCodePointNumBytes(Text[0], Encoding);

Chars += 1;

}

if (Chars > MaxSplit)

break;

if (Text[0] == ' ')

SpaceOffset = SplitPoint;

96

if (Text[0] == '/')

97

SlashOffset = SplitPoint;

98

99

SplitPoint += Advance;

100

Text = Text.substr(Advance);

101

}

102

103

if (SpaceOffset != 0)

104

return BreakableToken::Split(SpaceOffset + 1, 0);

105

if (SlashOffset != 0)

106

return BreakableToken::Split(SlashOffset + 1, 0);

107

if (SplitPoint != 0)

108

return BreakableToken::Split(SplitPoint, 0);

109

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

110

}

111

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

112

} // namespace

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

113

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

114

unsigned BreakableSingleLineToken::getLineCount() const { return 1; }

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

115

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

116

unsigned BreakableSingleLineToken::getLineLengthAfterSplit(

117

unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

118

return StartColumn + Prefix.size() + Postfix.size() +

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

119

encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

120

}

121

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

122

BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,

123

unsigned StartColumn,

124

StringRef Prefix,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

125

StringRef Postfix,

126

encoding::Encoding Encoding)

127

: BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

128

Postfix(Postfix) {

129

assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));

130

Line = Tok.TokenText.substr(

131

Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

132

}

133

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

134

BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

135

unsigned StartColumn,

136

encoding::Encoding Encoding)

137

: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

138

139

BreakableToken::Split

140

BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,

141

unsigned ColumnLimit) const {

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

142

return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,

143

Encoding);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

144

}

145

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

146

void BreakableStringLiteral::insertBreak(unsigned LineIndex,

147

unsigned TailOffset, Split Split,

148

bool InPPDirective,

149

WhitespaceManager &Whitespaces) {

150

Whitespaces.replaceWhitespaceInToken(

151

Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,

152

Prefix, InPPDirective, 1, StartColumn);

153

}

154

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

155

static StringRef getLineCommentPrefix(StringRef Comment) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

156

const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

157

for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

158

if (Comment.startswith(KnownPrefixes[i]))

159

return KnownPrefixes[i];

return "";

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

163

BreakableLineComment::BreakableLineComment(const FormatToken &Token,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

164

unsigned StartColumn,

165

encoding::Encoding Encoding)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

166

: BreakableSingleLineToken(Token, StartColumn,

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

167

getLineCommentPrefix(Token.TokenText), "",

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

168

Encoding) {

169

OriginalPrefix = Prefix;

170

if (Token.TokenText.size() > Prefix.size() &&

171

isAlphanumeric(Token.TokenText[Prefix.size()])) {

172

if (Prefix == "//")

173

Prefix = "// ";

174

else if (Prefix == "///")

175

Prefix = "/// ";

176

}

177

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

178

179

BreakableToken::Split

180

BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,

181

unsigned ColumnLimit) const {

182

return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

183

ColumnLimit, Encoding);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

184

}

185

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

186

void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

187

Split Split, bool InPPDirective,

188

WhitespaceManager &Whitespaces) {

189

Whitespaces.replaceWhitespaceInToken(

190

Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,

191

Postfix, Prefix, InPPDirective, 1, StartColumn);

}

void

BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,

196

unsigned InPPDirective,

197

WhitespaceManager &Whitespaces) {

198

if (OriginalPrefix != Prefix) {

199

Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",

false, 0, 1);

}

}

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

204

BreakableBlockComment::BreakableBlockComment(

205

const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,

206

unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)

207

: BreakableToken(Token, Encoding) {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

208

StringRef TokenText(Token.TokenText);

209

assert(TokenText.startswith("/*") && TokenText.endswith("*/"));

210

TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");

211

212

int IndentDelta = StartColumn - OriginalStartColumn;

213

bool NeedsStar = true;

214

LeadingWhitespace.resize(Lines.size());

215

StartOfLineColumn.resize(Lines.size());

216

if (Lines.size() == 1 && !FirstInLine) {

217

// Comments for which FirstInLine is false can start on arbitrary column,

218

// and available horizontal space can be too small to align consecutive

219

// lines with the first one.

220

// FIXME: We could, probably, align them to current indentation level, but

221

// now we just wrap them without stars.

222

NeedsStar = false;

223

}

224

StartOfLineColumn[0] = StartColumn + 2;

225

for (size_t i = 1; i < Lines.size(); ++i) {

226

adjustWhitespace(Style, i, IndentDelta);

227

if (Lines[i].empty())

228

// If the last line is empty, the closing "*/" will have a star.

229

NeedsStar = NeedsStar && i + 1 == Lines.size();

230

else

231

NeedsStar = NeedsStar && Lines[i][0] == '*';

232

}

233

Decoration = NeedsStar ? "* " : "";

234

IndentAtLineBreak = StartOfLineColumn[0] + 1;

235

for (size_t i = 1; i < Lines.size(); ++i) {

236

if (Lines[i].empty()) {

237

if (!NeedsStar && i + 1 != Lines.size())

238

// For all but the last line (which always ends in */), set the

239

// start column to 0 if they're empty, so we do not insert

240

// trailing whitespace anywhere.

241

StartOfLineColumn[i] = 0;

continue;

}

if (NeedsStar) {

// The first line already excludes the star.

246

// For all other lines, adjust the line to exclude the star and

247

// (optionally) the first whitespace.

248

int Offset = Lines[i].startswith("* ") ? 2 : 1;

249

StartOfLineColumn[i] += Offset;

250

Lines[i] = Lines[i].substr(Offset);

251

LeadingWhitespace[i] += Offset;

252

}

253

IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);

254

}

Daniel Jasper

cb4b40b

2013-05-30 17:27:48 +0000

[diff] [blame]

255

IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

256

DEBUG({

257

for (size_t i = 0; i < Lines.size(); ++i) {

258

llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]

<< "\n";

}

});

}

void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,

265

unsigned LineIndex,

266

int IndentDelta) {

267

// Calculate the end of the non-whitespace text in the previous line.

268

size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");

269

if (EndOfPreviousLine == StringRef::npos)

270

EndOfPreviousLine = 0;

271

else

272

++EndOfPreviousLine;

273

// Calculate the start of the non-whitespace text in the current line.

274

size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");

275

if (StartOfLine == StringRef::npos)

276

StartOfLine = Lines[LineIndex].size();

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

277

278

// Adjust Lines to only contain relevant text.

279

Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);

280

Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);

281

// Adjust LeadingWhitespace to account all whitespace between the lines

282

// to the current line.

283

LeadingWhitespace[LineIndex] =

284

Lines[LineIndex].begin() - Lines[LineIndex - 1].end();

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

285

286

// FIXME: We currently count tabs as 1 character. To solve this, we need to

287

// get the correct indentation width of the start of the comment, which

288

// requires correct counting of the tab expansions before the comment, and

289

// a configurable tab width. Since the current implementation only breaks

290

// if leading tabs are intermixed with spaces, that is not a high priority.

291

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

292

// Adjust the start column uniformly accross all lines.

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

293

StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

294

}

295

296

unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }

297

Alexander Kornienko

2785b9a

2013-06-07 16:02:52 +0000

[diff] [blame]

298

unsigned BreakableBlockComment::getLineLengthAfterSplit(

299

unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {

300

return getContentStartColumn(LineIndex, Offset) +

301

encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

302

Encoding) +

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

303

// The last line gets a "*/" postfix.

304

(LineIndex + 1 == Lines.size() ? 2 : 0);

305

}

306

307

BreakableToken::Split

308

BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,

309

unsigned ColumnLimit) const {

310

return getCommentSplit(Lines[LineIndex].substr(TailOffset),

311

getContentStartColumn(LineIndex, TailOffset),

Alexander Kornienko

2013-06-05 14:09:10 +0000

[diff] [blame]

312

ColumnLimit, Encoding);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

313

}

314

315

void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

316

Split Split, bool InPPDirective,

317

WhitespaceManager &Whitespaces) {

318

StringRef Text = Lines[LineIndex].substr(TailOffset);

319

StringRef Prefix = Decoration;

320

if (LineIndex + 1 == Lines.size() &&

321

Text.size() == Split.first + Split.second) {

322

// For the last line we need to break before "*/", but not to add "* ".

Prefix = "";

}

unsigned BreakOffsetInToken =

327

Text.data() - Tok.TokenText.data() + Split.first;

328

unsigned CharsToRemove = Split.second;

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

329

assert(IndentAtLineBreak >= Decoration.size());

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

330

Whitespaces.replaceWhitespaceInToken(Tok, BreakOffsetInToken, CharsToRemove,

331

"", Prefix, InPPDirective, 1,

332

IndentAtLineBreak - Decoration.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

}

void

BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,

337

unsigned InPPDirective,

338

WhitespaceManager &Whitespaces) {

339

if (LineIndex == 0)

340

return;

341

StringRef Prefix = Decoration;

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

342

if (Lines[LineIndex].empty()) {

343

if (LineIndex + 1 == Lines.size()) {

344

// If the last line is empty, we don't need a prefix, as the */ will line

345

// up with the decoration (if it exists).

346

Prefix = "";

347

} else if (!Decoration.empty()) {

348

// For other empty lines, if we do have a decoration, adapt it to not

349

// contain a trailing whitespace.

350

Prefix = Prefix.substr(0, 1);

351

}

Daniel Jasper

e2c482f

2013-05-30 06:40:07 +0000

[diff] [blame]

352

} else {

353

if (StartOfLineColumn[LineIndex] == 1) {

354

// This lines starts immediately after the decorating *.

355

Prefix = Prefix.substr(0, 1);

356

}

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

357

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

358

359

unsigned WhitespaceOffsetInToken =

360

Lines[LineIndex].data() - Tok.TokenText.data() -

361

LeadingWhitespace[LineIndex];

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

362

assert(StartOfLineColumn[LineIndex] >= Prefix.size());

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

363

Whitespaces.replaceWhitespaceInToken(

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

364

Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,

Alexander Kornienko

2013-06-11 16:01:49 +0000

[diff] [blame^]

365

InPPDirective, 1, StartOfLineColumn[LineIndex] - Prefix.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

}

unsigned

BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

370

unsigned TailOffset) const {

371

// If we break, we always break at the predefined indent.

372

if (TailOffset != 0)

373

return IndentAtLineBreak;

374

return StartOfLineColumn[LineIndex];

375

}

376

Alexander Kornienko