Blame - lib/Format/BreakableToken.cpp - fp2-dev/platform/external/clang

2013-04-15 14:28:00 +0000

[diff] [blame]

1

//===--- BreakableToken.cpp - Format C++ code -----------------------------===//

2

//

3

// The LLVM Compiler Infrastructure

4

//

5

// This file is distributed under the University of Illinois Open Source

6

// License. See LICENSE.TXT for details.

7

//

8

//===----------------------------------------------------------------------===//

9

///

10

/// \file

11

/// \brief Contains implementation of BreakableToken class and classes derived

12

/// from it.

13

///

14

//===----------------------------------------------------------------------===//

15

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

16

#define DEBUG_TYPE "format-token-breaker"

17

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

18

#include "BreakableToken.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

19

#include "clang/Format/Format.h"

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

20

#include "llvm/ADT/STLExtras.h"

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

21

#include "llvm/Support/Debug.h"

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

#include <algorithm>

namespace clang {

namespace format {

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

26

namespace {

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

27

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

28

// FIXME: Move helper string functions to where it makes sense.

29

30

unsigned getOctalLength(StringRef Text) {

31

unsigned I = 1;

32

while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {

++I;

}

return I;

}

unsigned getHexLength(StringRef Text) {

39

unsigned I = 2; // Point after '\x'.

40

while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||

41

(Text[I] >= 'a' && Text[I] <= 'f') ||

42

(Text[I] >= 'A' && Text[I] <= 'F'))) {

++I;

}

return I;

}

unsigned getEscapeSequenceLength(StringRef Text) {

49

assert(Text[0] == '\\');

if (Text.size() < 2)

return 1;

switch (Text[1]) {

case 'u':

return 6;

case 'U':

return 10;

case 'x':

return getHexLength(Text);

60

default:

61

if (Text[1] >= '0' && Text[1] <= '7')

62

return getOctalLength(Text);

return 2;

}

}

StringRef::size_type getStartOfCharacter(StringRef Text,

68

StringRef::size_type Offset) {

69

StringRef::size_type NextEscape = Text.find('\\');

70

while (NextEscape != StringRef::npos && NextEscape < Offset) {

71

StringRef::size_type SequenceLength =

72

getEscapeSequenceLength(Text.substr(NextEscape));

73

if (Offset < NextEscape + SequenceLength)

74

return NextEscape;

75

NextEscape = Text.find('\\', NextEscape + SequenceLength);

}

return Offset;

}

BreakableToken::Split getCommentSplit(StringRef Text,

81

unsigned ContentStartColumn,

82

unsigned ColumnLimit) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

83

if (ColumnLimit <= ContentStartColumn + 1)

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

84

return BreakableToken::Split(StringRef::npos, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

85

86

unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

87

StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);

88

if (SpaceOffset == StringRef::npos ||

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

89

// Don't break at leading whitespace.

Manuel Klimek

be9ed77

2013-05-29 22:06:18 +0000

[diff] [blame]

90

Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {

91

// Make sure that we don't break at leading whitespace that

92

// reaches past MaxSplit.

93

StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(" ");

94

if (FirstNonWhitespace == StringRef::npos)

95

// If the comment is only whitespace, we cannot split.

96

return BreakableToken::Split(StringRef::npos, 0);

97

SpaceOffset =

98

Text.find(' ', std::max<unsigned>(MaxSplit, FirstNonWhitespace));

99

}

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

100

if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

101

StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();

102

StringRef AfterCut = Text.substr(SpaceOffset).ltrim();

103

return BreakableToken::Split(BeforeCut.size(),

104

AfterCut.begin() - BeforeCut.end());

105

}

106

return BreakableToken::Split(StringRef::npos, 0);

107

}

108

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

109

BreakableToken::Split getStringSplit(StringRef Text,

110

unsigned ContentStartColumn,

111

unsigned ColumnLimit) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

112

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

113

if (ColumnLimit <= ContentStartColumn)

114

return BreakableToken::Split(StringRef::npos, 0);

115

unsigned MaxSplit = ColumnLimit - ContentStartColumn;

116

// FIXME: Reduce unit test case.

117

if (Text.empty())

118

return BreakableToken::Split(StringRef::npos, 0);

119

MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);

120

StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);

121

if (SpaceOffset != StringRef::npos && SpaceOffset != 0)

122

return BreakableToken::Split(SpaceOffset + 1, 0);

123

StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);

124

if (SlashOffset != StringRef::npos && SlashOffset != 0)

125

return BreakableToken::Split(SlashOffset + 1, 0);

126

StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);

127

if (SplitPoint == StringRef::npos || SplitPoint == 0)

128

return BreakableToken::Split(StringRef::npos, 0);

129

return BreakableToken::Split(SplitPoint, 0);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

130

}

131

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

132

} // namespace

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

133

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

134

unsigned BreakableSingleLineToken::getLineCount() const { return 1; }

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

135

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

136

unsigned

137

BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,

138

unsigned TailOffset) const {

139

return StartColumn + Prefix.size() + Postfix.size() + Line.size() -

140

TailOffset;

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

141

}

142

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

143

void BreakableSingleLineToken::insertBreak(unsigned LineIndex,

144

unsigned TailOffset, Split Split,

145

bool InPPDirective,

146

WhitespaceManager &Whitespaces) {

147

Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,

148

Split.second, Postfix, Prefix, InPPDirective,

149

StartColumn);

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

150

}

151

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

152

BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,

153

unsigned StartColumn,

154

StringRef Prefix,

155

StringRef Postfix)

156

: BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),

157

Postfix(Postfix) {

158

assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));

159

Line = Tok.TokenText.substr(

160

Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

Alexander Kornienko

2013-04-15 14:28:00 +0000

[diff] [blame]

161

}

162

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

163

BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,

164

unsigned StartColumn)

165

: BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}

166

167

BreakableToken::Split

168

BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,

169

unsigned ColumnLimit) const {

170

return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

171

}

172

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

173

static StringRef getLineCommentPrefix(StringRef Comment) {

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

174

const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

175

for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)

Alexander Kornienko

2013-04-17 17:34:05 +0000

[diff] [blame]

176

if (Comment.startswith(KnownPrefixes[i]))

177

return KnownPrefixes[i];

return "";

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

181

BreakableLineComment::BreakableLineComment(const FormatToken &Token,

182

unsigned StartColumn)

183

: BreakableSingleLineToken(Token, StartColumn,

184

getLineCommentPrefix(Token.TokenText), "") {}

185

186

BreakableToken::Split

187

BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,

188

unsigned ColumnLimit) const {

189

return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),

ColumnLimit);

}

BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,

194

const FormatToken &Token,

195

unsigned StartColumn,

196

unsigned OriginalStartColumn,

197

bool FirstInLine)

198

: BreakableToken(Token) {

199

StringRef TokenText(Token.TokenText);

200

assert(TokenText.startswith("/*") && TokenText.endswith("*/"));

201

TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");

202

203

int IndentDelta = StartColumn - OriginalStartColumn;

204

bool NeedsStar = true;

205

LeadingWhitespace.resize(Lines.size());

206

StartOfLineColumn.resize(Lines.size());

207

if (Lines.size() == 1 && !FirstInLine) {

208

// Comments for which FirstInLine is false can start on arbitrary column,

209

// and available horizontal space can be too small to align consecutive

210

// lines with the first one.

211

// FIXME: We could, probably, align them to current indentation level, but

212

// now we just wrap them without stars.

213

NeedsStar = false;

214

}

215

StartOfLineColumn[0] = StartColumn + 2;

216

for (size_t i = 1; i < Lines.size(); ++i) {

217

adjustWhitespace(Style, i, IndentDelta);

218

if (Lines[i].empty())

219

// If the last line is empty, the closing "*/" will have a star.

220

NeedsStar = NeedsStar && i + 1 == Lines.size();

221

else

222

NeedsStar = NeedsStar && Lines[i][0] == '*';

223

}

224

Decoration = NeedsStar ? "* " : "";

225

IndentAtLineBreak = StartOfLineColumn[0] + 1;

226

for (size_t i = 1; i < Lines.size(); ++i) {

227

if (Lines[i].empty()) {

228

if (!NeedsStar && i + 1 != Lines.size())

229

// For all but the last line (which always ends in */), set the

230

// start column to 0 if they're empty, so we do not insert

231

// trailing whitespace anywhere.

232

StartOfLineColumn[i] = 0;

continue;

}

if (NeedsStar) {

// The first line already excludes the star.

237

// For all other lines, adjust the line to exclude the star and

238

// (optionally) the first whitespace.

239

int Offset = Lines[i].startswith("* ") ? 2 : 1;

240

StartOfLineColumn[i] += Offset;

241

Lines[i] = Lines[i].substr(Offset);

242

LeadingWhitespace[i] += Offset;

243

}

Daniel Jasper

9209270

2013-05-30 15:20:29 +0000

[diff] [blame^]

244

// Exclude empty lines from the calculation of the left-most column.

245

if (Lines[i].empty())

246

continue;

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

247

IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);

248

}

249

DEBUG({

250

for (size_t i = 0; i < Lines.size(); ++i) {

251

llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]

<< "\n";

}

});

}

void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,

258

unsigned LineIndex,

259

int IndentDelta) {

260

// Calculate the end of the non-whitespace text in the previous line.

261

size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");

262

if (EndOfPreviousLine == StringRef::npos)

263

EndOfPreviousLine = 0;

264

else

265

++EndOfPreviousLine;

266

// Calculate the start of the non-whitespace text in the current line.

267

size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");

268

if (StartOfLine == StringRef::npos)

269

StartOfLine = Lines[LineIndex].size();

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

270

271

// Adjust Lines to only contain relevant text.

272

Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);

273

Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);

274

// Adjust LeadingWhitespace to account all whitespace between the lines

275

// to the current line.

276

LeadingWhitespace[LineIndex] =

277

Lines[LineIndex].begin() - Lines[LineIndex - 1].end();

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

278

279

// FIXME: We currently count tabs as 1 character. To solve this, we need to

280

// get the correct indentation width of the start of the comment, which

281

// requires correct counting of the tab expansions before the comment, and

282

// a configurable tab width. Since the current implementation only breaks

283

// if leading tabs are intermixed with spaces, that is not a high priority.

284

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

285

// Adjust the start column uniformly accross all lines.

Manuel Klimek

d63312b

2013-05-28 10:01:59 +0000

[diff] [blame]

286

StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

287

}

288

289

unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }

290

291

unsigned

292

BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,

293

unsigned TailOffset) const {

294

return getContentStartColumn(LineIndex, TailOffset) +

295

(Lines[LineIndex].size() - TailOffset) +

296

// The last line gets a "*/" postfix.

297

(LineIndex + 1 == Lines.size() ? 2 : 0);

298

}

299

300

BreakableToken::Split

301

BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,

302

unsigned ColumnLimit) const {

303

return getCommentSplit(Lines[LineIndex].substr(TailOffset),

304

getContentStartColumn(LineIndex, TailOffset),

ColumnLimit);

}

void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

309

Split Split, bool InPPDirective,

310

WhitespaceManager &Whitespaces) {

311

StringRef Text = Lines[LineIndex].substr(TailOffset);

312

StringRef Prefix = Decoration;

313

if (LineIndex + 1 == Lines.size() &&

314

Text.size() == Split.first + Split.second) {

315

// For the last line we need to break before "*/", but not to add "* ".

Prefix = "";

}

unsigned BreakOffsetInToken =

320

Text.data() - Tok.TokenText.data() + Split.first;

321

unsigned CharsToRemove = Split.second;

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

322

assert(IndentAtLineBreak >= Decoration.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

323

Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,

324

InPPDirective, IndentAtLineBreak - Decoration.size());

}

void

BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,

329

unsigned InPPDirective,

330

WhitespaceManager &Whitespaces) {

331

if (LineIndex == 0)

332

return;

333

StringRef Prefix = Decoration;

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

334

if (Lines[LineIndex].empty()) {

335

if (LineIndex + 1 == Lines.size()) {

336

// If the last line is empty, we don't need a prefix, as the */ will line

337

// up with the decoration (if it exists).

338

Prefix = "";

339

} else if (!Decoration.empty()) {

340

// For other empty lines, if we do have a decoration, adapt it to not

341

// contain a trailing whitespace.

342

Prefix = Prefix.substr(0, 1);

343

}

Daniel Jasper

e2c482f

2013-05-30 06:40:07 +0000

[diff] [blame]

344

} else {

345

if (StartOfLineColumn[LineIndex] == 1) {

346

// This lines starts immediately after the decorating *.

347

Prefix = Prefix.substr(0, 1);

348

}

Manuel Klimek

c5cc4bf

2013-05-28 08:55:01 +0000

[diff] [blame]

349

}

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

350

351

unsigned WhitespaceOffsetInToken =

352

Lines[LineIndex].data() - Tok.TokenText.data() -

353

LeadingWhitespace[LineIndex];

Manuel Klimek

b6dba33

2013-05-30 07:45:53 +0000

[diff] [blame]

354

assert(StartOfLineColumn[LineIndex] >= Prefix.size());

Manuel Klimek

2013-05-27 15:23:34 +0000

[diff] [blame]

355

Whitespaces.breakToken(

356

Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,

357

InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());

}

unsigned

BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

362

unsigned TailOffset) const {

363

// If we break, we always break at the predefined indent.

364

if (TailOffset != 0)

365

return IndentAtLineBreak;

366

return StartOfLineColumn[LineIndex];

367

}

368

Alexander Kornienko