blob: f9acd2ac9194b69f7ea771b48bf5351eb2104a20 [file] [log] [blame]
Dmitri Gribenko2d44d772012-06-26 20:39:18 +00001#include "clang/AST/CommentLexer.h"
2#include "llvm/ADT/StringSwitch.h"
3#include "llvm/Support/ErrorHandling.h"
4
5namespace clang {
6namespace comments {
7
8void Token::dump(const Lexer &L, const SourceManager &SM) const {
9 llvm::errs() << "comments::Token Kind=" << Kind << " ";
10 Loc.dump(SM);
11 llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
12}
13
14bool Lexer::isVerbatimBlockCommand(StringRef BeginName,
15 StringRef &EndName) const {
16 const char *Result = llvm::StringSwitch<const char *>(BeginName)
17 .Case("code", "endcode")
18 .Case("verbatim", "endverbatim")
19 .Case("htmlonly", "endhtmlonly")
20 .Case("latexonly", "endlatexonly")
21 .Case("xmlonly", "endxmlonly")
22 .Case("manonly", "endmanonly")
23 .Case("rtfonly", "endrtfonly")
24
25 .Case("dot", "enddot")
26 .Case("msc", "endmsc")
27
28 .Case("f$", "f$") // Inline LaTeX formula
29 .Case("f[", "f]") // Displayed LaTeX formula
30 .Case("f{", "f}") // LaTeX environment
31
32 .Default(NULL);
33
34 if (Result) {
35 EndName = Result;
36 return true;
37 }
38
39 for (VerbatimBlockCommandVector::const_iterator
40 I = VerbatimBlockCommands.begin(),
41 E = VerbatimBlockCommands.end();
42 I != E; ++I)
43 if (I->BeginName == BeginName) {
44 EndName = I->EndName;
45 return true;
46 }
47
48 return false;
49}
50
51bool Lexer::isVerbatimLineCommand(StringRef Name) const {
52 bool Result = llvm::StringSwitch<bool>(Name)
53 .Case("fn", true)
54 .Case("var", true)
55 .Case("property", true)
56 .Case("typedef", true)
57
58 .Case("overload", true)
59
60 .Case("defgroup", true)
61 .Case("ingroup", true)
62 .Case("addtogroup", true)
63 .Case("weakgroup", true)
64 .Case("name", true)
65
66 .Case("section", true)
67 .Case("subsection", true)
68 .Case("subsubsection", true)
69 .Case("paragraph", true)
70
71 .Case("mainpage", true)
72 .Case("subpage", true)
73 .Case("ref", true)
74
75 .Default(false);
76
77 if (Result)
78 return true;
79
80 for (VerbatimLineCommandVector::const_iterator
81 I = VerbatimLineCommands.begin(),
82 E = VerbatimLineCommands.end();
83 I != E; ++I)
84 if (I->Name == Name)
85 return true;
86
87 return false;
88}
89
90void Lexer::skipLineStartingDecorations() {
91 // This function should be called only for C comments
92 assert(CommentState == LCS_InsideCComment);
93
94 if (BufferPtr == CommentEnd)
95 return;
96
97 switch (*BufferPtr) {
98 case ' ':
99 case '\t':
100 case '\f':
101 case '\v': {
102 const char *NewBufferPtr = BufferPtr;
103 NewBufferPtr++;
104 if (NewBufferPtr == CommentEnd)
105 return;
106
107 char C = *NewBufferPtr;
108 while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
109 NewBufferPtr++;
110 if (NewBufferPtr == CommentEnd)
111 return;
112 C = *NewBufferPtr;
113 }
114 if (C == '*')
115 BufferPtr = NewBufferPtr + 1;
116 break;
117 }
118 case '*':
119 BufferPtr++;
120 break;
121 }
122}
123
124namespace {
125const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
126 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
127 const char C = *BufferPtr;
128 if (C == '\n' || C == '\r')
129 return BufferPtr;
130 }
131 return BufferEnd;
132}
133
134const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
135 if (BufferPtr == BufferEnd)
136 return BufferPtr;
137
138 if (*BufferPtr == '\n')
139 BufferPtr++;
140 else {
141 assert(*BufferPtr == '\r');
142 BufferPtr++;
143 if (BufferPtr != BufferEnd && *BufferPtr == '\n')
144 BufferPtr++;
145 }
146 return BufferPtr;
147}
148
149bool isHTMLIdentifierCharacter(char C) {
150 return (C >= 'a' && C <= 'z') ||
151 (C >= 'A' && C <= 'Z') ||
152 (C >= '0' && C <= '9');
153}
154
155const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
156 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
157 if (!isHTMLIdentifierCharacter(*BufferPtr))
158 return BufferPtr;
159 }
160 return BufferEnd;
161}
162
163/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
164/// string allowed.
165///
166/// Returns pointer to closing quote.
167const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
168{
169 const char Quote = *BufferPtr;
170 assert(Quote == '\"' || Quote == '\'');
171
172 BufferPtr++;
173 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
174 const char C = *BufferPtr;
175 if (C == Quote && BufferPtr[-1] != '\\')
176 return BufferPtr;
177 }
178 return BufferEnd;
179}
180
181bool isHorizontalWhitespace(char C) {
182 return C == ' ' || C == '\t' || C == '\f' || C == '\v';
183}
184
185bool isWhitespace(char C) {
186 return C == ' ' || C == '\n' || C == '\r' ||
187 C == '\t' || C == '\f' || C == '\v';
188}
189
190const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
191 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
192 if (!isWhitespace(*BufferPtr))
193 return BufferPtr;
194 }
195 return BufferEnd;
196}
197
198bool isCommandNameCharacter(char C) {
199 return (C >= 'a' && C <= 'z') ||
200 (C >= 'A' && C <= 'Z') ||
201 (C >= '0' && C <= '9');
202}
203
204const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
205 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
206 if (!isCommandNameCharacter(*BufferPtr))
207 return BufferPtr;
208 }
209 return BufferEnd;
210}
211
212/// Return the one past end pointer for BCPL comments.
213/// Handles newlines escaped with backslash or trigraph for backslahs.
214const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
215 const char *CurPtr = BufferPtr;
216 while (CurPtr != BufferEnd) {
217 char C = *CurPtr;
218 while (C != '\n' && C != '\r') {
219 CurPtr++;
220 if (CurPtr == BufferEnd)
221 return BufferEnd;
222 C = *CurPtr;
223 }
224 // We found a newline, check if it is escaped.
225 const char *EscapePtr = CurPtr - 1;
226 while(isHorizontalWhitespace(*EscapePtr))
227 EscapePtr--;
228
229 if (*EscapePtr == '\\' ||
230 (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
231 EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
232 // We found an escaped newline.
233 CurPtr = skipNewline(CurPtr, BufferEnd);
234 } else
235 return CurPtr; // Not an escaped newline.
236 }
237 return BufferEnd;
238}
239
240/// Return the one past end pointer for C comments.
241/// Very dumb, does not handle escaped newlines or trigraphs.
242const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
243 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
244 if (*BufferPtr == '*') {
245 assert(BufferPtr + 1 != BufferEnd);
246 if (*(BufferPtr + 1) == '/')
247 return BufferPtr;
248 }
249 }
250 llvm_unreachable("buffer end hit before '*/' was seen");
251}
252} // unnamed namespace
253
254void Lexer::lexCommentText(Token &T) {
255 assert(CommentState == LCS_InsideBCPLComment ||
256 CommentState == LCS_InsideCComment);
257
258 switch (State) {
259 case LS_Normal:
260 break;
261 case LS_VerbatimBlockFirstLine:
262 lexVerbatimBlockFirstLine(T);
263 return;
264 case LS_VerbatimBlockBody:
265 lexVerbatimBlockBody(T);
266 return;
267 case LS_HTMLOpenTag:
268 lexHTMLOpenTag(T);
269 return;
270 }
271
272 assert(State == LS_Normal);
273
274 const char *TokenPtr = BufferPtr;
275 assert(TokenPtr < CommentEnd);
276 while (TokenPtr != CommentEnd) {
277 switch(*TokenPtr) {
278 case '\\':
279 case '@': {
280 TokenPtr++;
281 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000282 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000283 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000284 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000285 return;
286 }
287 char C = *TokenPtr;
288 switch (C) {
289 default:
290 break;
291
292 case '\\': case '@': case '&': case '$':
293 case '#': case '<': case '>': case '%':
294 case '\"': case '.': case ':':
295 // This is one of \\ \@ \& \$ etc escape sequences.
296 TokenPtr++;
297 if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
298 // This is the \:: escape sequence.
299 TokenPtr++;
300 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000301 StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000302 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000303 T.setText(UnescapedText);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000304 return;
305 }
306
307 // Don't make zero-length commands.
308 if (!isCommandNameCharacter(*TokenPtr)) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000309 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000310 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000311 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000312 return;
313 }
314
315 TokenPtr = skipCommandName(TokenPtr, CommentEnd);
316 unsigned Length = TokenPtr - (BufferPtr + 1);
317
318 // Hardcoded support for lexing LaTeX formula commands
319 // \f$ \f[ \f] \f{ \f} as a single command.
320 if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
321 C = *TokenPtr;
322 if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
323 TokenPtr++;
324 Length++;
325 }
326 }
327
328 const StringRef CommandName(BufferPtr + 1, Length);
329 StringRef EndName;
330
331 if (isVerbatimBlockCommand(CommandName, EndName)) {
332 setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
333 return;
334 }
335 if (isVerbatimLineCommand(CommandName)) {
336 lexVerbatimLine(T, TokenPtr);
337 return;
338 }
339 formTokenWithChars(T, TokenPtr, tok::command);
340 T.setCommandName(CommandName);
341 return;
342 }
343
344 case '<': {
345 TokenPtr++;
346 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000347 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000348 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000349 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000350 return;
351 }
352 const char C = *TokenPtr;
353 if (isHTMLIdentifierCharacter(C))
354 setupAndLexHTMLOpenTag(T);
355 else if (C == '/')
356 lexHTMLCloseTag(T);
357 return;
358 }
359
360 case '\n':
361 case '\r':
362 TokenPtr = skipNewline(TokenPtr, CommentEnd);
363 formTokenWithChars(T, TokenPtr, tok::newline);
364
365 if (CommentState == LCS_InsideCComment)
366 skipLineStartingDecorations();
367 return;
368
369 default: {
370 while (true) {
371 TokenPtr++;
372 if (TokenPtr == CommentEnd)
373 break;
374 char C = *TokenPtr;
375 if(C == '\n' || C == '\r' ||
376 C == '\\' || C == '@' || C == '<')
377 break;
378 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000379 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000380 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000381 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000382 return;
383 }
384 }
385 }
386}
387
388void Lexer::setupAndLexVerbatimBlock(Token &T,
389 const char *TextBegin,
390 char Marker, StringRef EndName) {
391 VerbatimBlockEndCommandName.clear();
392 VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
393 VerbatimBlockEndCommandName.append(EndName);
394
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000395 StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000396 formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000397 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000398
399 State = LS_VerbatimBlockFirstLine;
400}
401
402void Lexer::lexVerbatimBlockFirstLine(Token &T) {
403 assert(BufferPtr < CommentEnd);
404
405 // FIXME: It would be better to scan the text once, finding either the block
406 // end command or newline.
407 //
408 // Extract current line.
409 const char *Newline = findNewline(BufferPtr, CommentEnd);
410 StringRef Line(BufferPtr, Newline - BufferPtr);
411
412 // Look for end command in current line.
413 size_t Pos = Line.find(VerbatimBlockEndCommandName);
414 const char *NextLine;
415 if (Pos == StringRef::npos) {
416 // Current line is completely verbatim.
417 NextLine = skipNewline(Newline, CommentEnd);
418 } else if (Pos == 0) {
419 // Current line contains just an end command.
420 const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000421 StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000422 formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000423 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000424 State = LS_Normal;
425 return;
426 } else {
427 // There is some text, followed by end command. Extract text first.
428 NextLine = BufferPtr + Pos;
429 }
430
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000431 StringRef Text(BufferPtr, NextLine - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000432 formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000433 T.setVerbatimBlockText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000434
435 State = LS_VerbatimBlockBody;
436}
437
438void Lexer::lexVerbatimBlockBody(Token &T) {
439 assert(State == LS_VerbatimBlockBody);
440
441 if (CommentState == LCS_InsideCComment)
442 skipLineStartingDecorations();
443
444 lexVerbatimBlockFirstLine(T);
445}
446
447void Lexer::lexVerbatimLine(Token &T, const char *TextBegin) {
448 // Extract current line.
449 const char *Newline = findNewline(BufferPtr, CommentEnd);
450
451 const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
452 const StringRef Text(TextBegin, Newline - TextBegin);
453
454 formTokenWithChars(T, Newline, tok::verbatim_line);
455 T.setVerbatimLineName(Name);
456 T.setVerbatimLineText(Text);
457}
458
459void Lexer::setupAndLexHTMLOpenTag(Token &T) {
460 assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
461 const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
462
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000463 StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000464 formTokenWithChars(T, TagNameEnd, tok::html_tag_open);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000465 T.setHTMLTagOpenName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000466
467 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
468
469 if (BufferPtr != CommentEnd && *BufferPtr == '>') {
470 BufferPtr++;
471 return;
472 }
473
474 if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
475 State = LS_HTMLOpenTag;
476}
477
478void Lexer::lexHTMLOpenTag(Token &T) {
479 assert(State == LS_HTMLOpenTag);
480
481 const char *TokenPtr = BufferPtr;
482 char C = *TokenPtr;
483 if (isHTMLIdentifierCharacter(C)) {
484 TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000485 StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000486 formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000487 T.setHTMLIdent(Ident);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000488 } else {
489 switch (C) {
490 case '=':
491 TokenPtr++;
492 formTokenWithChars(T, TokenPtr, tok::html_equals);
493 break;
494 case '\"':
495 case '\'': {
496 const char *OpenQuote = TokenPtr;
497 TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
498 const char *ClosingQuote = TokenPtr;
499 if (TokenPtr != CommentEnd) // Skip closing quote.
500 TokenPtr++;
501 formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
502 T.setHTMLQuotedString(StringRef(OpenQuote + 1,
503 ClosingQuote - (OpenQuote + 1)));
504 break;
505 }
506 case '>':
507 TokenPtr++;
508 formTokenWithChars(T, TokenPtr, tok::html_greater);
509 break;
510 }
511 }
512
513 // Now look ahead and return to normal state if we don't see any HTML tokens
514 // ahead.
515 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
516 if (BufferPtr == CommentEnd) {
517 State = LS_Normal;
518 return;
519 }
520
521 C = *BufferPtr;
522 if (!isHTMLIdentifierCharacter(C) &&
523 C != '=' && C != '\"' && C != '\'' && C != '>') {
524 State = LS_Normal;
525 return;
526 }
527}
528
529void Lexer::lexHTMLCloseTag(Token &T) {
530 assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
531
532 const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
533 const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
534
535 const char *End = skipWhitespace(TagNameEnd, CommentEnd);
536 if (End != CommentEnd && *End == '>')
537 End++;
538
539 formTokenWithChars(T, End, tok::html_tag_close);
540 T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
541}
542
543Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
544 const char *BufferStart, const char *BufferEnd):
545 BufferStart(BufferStart), BufferEnd(BufferEnd),
546 FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
547 CommentState(LCS_BeforeComment), State(LS_Normal) {
548}
549
550void Lexer::lex(Token &T) {
551again:
552 switch (CommentState) {
553 case LCS_BeforeComment:
554 if (BufferPtr == BufferEnd) {
555 formTokenWithChars(T, BufferPtr, tok::eof);
556 return;
557 }
558
559 assert(*BufferPtr == '/');
560 BufferPtr++; // Skip first slash.
561 switch(*BufferPtr) {
562 case '/': { // BCPL comment.
563 BufferPtr++; // Skip second slash.
564
565 if (BufferPtr != BufferEnd) {
566 // Skip Doxygen magic marker, if it is present.
567 // It might be missing because of a typo //< or /*<, or because we
568 // merged this non-Doxygen comment into a bunch of Doxygen comments
569 // around it: /** ... */ /* ... */ /** ... */
570 const char C = *BufferPtr;
571 if (C == '/' || C == '!')
572 BufferPtr++;
573 }
574
575 // Skip less-than symbol that marks trailing comments.
576 // Skip it even if the comment is not a Doxygen one, because //< and /*<
577 // are frequent typos.
578 if (BufferPtr != BufferEnd && *BufferPtr == '<')
579 BufferPtr++;
580
581 CommentState = LCS_InsideBCPLComment;
582 State = LS_Normal;
583 CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
584 goto again;
585 }
586 case '*': { // C comment.
587 BufferPtr++; // Skip star.
588
589 // Skip Doxygen magic marker.
590 const char C = *BufferPtr;
591 if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
592 BufferPtr++;
593
594 // Skip less-than symbol that marks trailing comments.
595 if (BufferPtr != BufferEnd && *BufferPtr == '<')
596 BufferPtr++;
597
598 CommentState = LCS_InsideCComment;
599 State = LS_Normal;
600 CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
601 goto again;
602 }
603 default:
604 llvm_unreachable("second character of comment should be '/' or '*'");
605 }
606
607 case LCS_BetweenComments: {
608 // Consecutive comments are extracted only if there is only whitespace
609 // between them. So we can search for the start of the next comment.
610 const char *EndWhitespace = BufferPtr;
611 while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
612 EndWhitespace++;
613
614 // Turn any whitespace between comments (and there is only whitespace
615 // between them) into a newline. We have two newlines between comments
616 // in total (first one was synthesized after a comment).
617 formTokenWithChars(T, EndWhitespace, tok::newline);
618
619 CommentState = LCS_BeforeComment;
620 break;
621 }
622
623 case LCS_InsideBCPLComment:
624 case LCS_InsideCComment:
625 if (BufferPtr != CommentEnd) {
626 lexCommentText(T);
627 break;
628 } else {
629 // Skip C comment closing sequence.
630 if (CommentState == LCS_InsideCComment) {
631 assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
632 BufferPtr += 2;
633 assert(BufferPtr <= BufferEnd);
634
635 // Synthenize newline just after the C comment, regardless if there is
636 // actually a newline.
637 formTokenWithChars(T, BufferPtr, tok::newline);
638
639 CommentState = LCS_BetweenComments;
640 break;
641 } else {
642 // Don't synthesized a newline after BCPL comment.
643 CommentState = LCS_BetweenComments;
644 goto again;
645 }
646 }
647 }
648}
649
650StringRef Lexer::getSpelling(const Token &Tok,
651 const SourceManager &SourceMgr,
652 bool *Invalid) const {
653 SourceLocation Loc = Tok.getLocation();
654 std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
655
656 bool InvalidTemp = false;
657 StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
658 if (InvalidTemp) {
659 *Invalid = true;
660 return StringRef();
661 }
662
663 const char *Begin = File.data() + LocInfo.second;
664 return StringRef(Begin, Tok.getLength());
665}
666
667void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) {
668 VerbatimBlockCommand VBC;
669 VBC.BeginName = BeginName;
670 VBC.EndName = EndName;
671 VerbatimBlockCommands.push_back(VBC);
672}
673
674void Lexer::addVerbatimLineCommand(StringRef Name) {
675 VerbatimLineCommand VLC;
676 VLC.Name = Name;
677 VerbatimLineCommands.push_back(VLC);
678}
679
680} // end namespace comments
681} // end namespace clang
682