blob: 0b76050ff0bc92edfc6df6ca125d17e5a78bfe8d [file] [log] [blame]
Dmitri Gribenko2d44d772012-06-26 20:39:18 +00001#include "clang/AST/CommentLexer.h"
2#include "llvm/ADT/StringSwitch.h"
3#include "llvm/Support/ErrorHandling.h"
4
5namespace clang {
6namespace comments {
7
8void Token::dump(const Lexer &L, const SourceManager &SM) const {
9 llvm::errs() << "comments::Token Kind=" << Kind << " ";
10 Loc.dump(SM);
11 llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
12}
13
14bool Lexer::isVerbatimBlockCommand(StringRef BeginName,
15 StringRef &EndName) const {
16 const char *Result = llvm::StringSwitch<const char *>(BeginName)
17 .Case("code", "endcode")
18 .Case("verbatim", "endverbatim")
19 .Case("htmlonly", "endhtmlonly")
20 .Case("latexonly", "endlatexonly")
21 .Case("xmlonly", "endxmlonly")
22 .Case("manonly", "endmanonly")
23 .Case("rtfonly", "endrtfonly")
24
25 .Case("dot", "enddot")
26 .Case("msc", "endmsc")
27
28 .Case("f$", "f$") // Inline LaTeX formula
29 .Case("f[", "f]") // Displayed LaTeX formula
30 .Case("f{", "f}") // LaTeX environment
31
32 .Default(NULL);
33
34 if (Result) {
35 EndName = Result;
36 return true;
37 }
38
39 for (VerbatimBlockCommandVector::const_iterator
40 I = VerbatimBlockCommands.begin(),
41 E = VerbatimBlockCommands.end();
42 I != E; ++I)
43 if (I->BeginName == BeginName) {
44 EndName = I->EndName;
45 return true;
46 }
47
48 return false;
49}
50
51bool Lexer::isVerbatimLineCommand(StringRef Name) const {
52 bool Result = llvm::StringSwitch<bool>(Name)
53 .Case("fn", true)
54 .Case("var", true)
55 .Case("property", true)
56 .Case("typedef", true)
57
58 .Case("overload", true)
59
60 .Case("defgroup", true)
61 .Case("ingroup", true)
62 .Case("addtogroup", true)
63 .Case("weakgroup", true)
64 .Case("name", true)
65
66 .Case("section", true)
67 .Case("subsection", true)
68 .Case("subsubsection", true)
69 .Case("paragraph", true)
70
71 .Case("mainpage", true)
72 .Case("subpage", true)
73 .Case("ref", true)
74
75 .Default(false);
76
77 if (Result)
78 return true;
79
80 for (VerbatimLineCommandVector::const_iterator
81 I = VerbatimLineCommands.begin(),
82 E = VerbatimLineCommands.end();
83 I != E; ++I)
84 if (I->Name == Name)
85 return true;
86
87 return false;
88}
89
90void Lexer::skipLineStartingDecorations() {
91 // This function should be called only for C comments
92 assert(CommentState == LCS_InsideCComment);
93
94 if (BufferPtr == CommentEnd)
95 return;
96
97 switch (*BufferPtr) {
98 case ' ':
99 case '\t':
100 case '\f':
101 case '\v': {
102 const char *NewBufferPtr = BufferPtr;
103 NewBufferPtr++;
104 if (NewBufferPtr == CommentEnd)
105 return;
106
107 char C = *NewBufferPtr;
108 while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
109 NewBufferPtr++;
110 if (NewBufferPtr == CommentEnd)
111 return;
112 C = *NewBufferPtr;
113 }
114 if (C == '*')
115 BufferPtr = NewBufferPtr + 1;
116 break;
117 }
118 case '*':
119 BufferPtr++;
120 break;
121 }
122}
123
124namespace {
125const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
126 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
127 const char C = *BufferPtr;
128 if (C == '\n' || C == '\r')
129 return BufferPtr;
130 }
131 return BufferEnd;
132}
133
134const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
135 if (BufferPtr == BufferEnd)
136 return BufferPtr;
137
138 if (*BufferPtr == '\n')
139 BufferPtr++;
140 else {
141 assert(*BufferPtr == '\r');
142 BufferPtr++;
143 if (BufferPtr != BufferEnd && *BufferPtr == '\n')
144 BufferPtr++;
145 }
146 return BufferPtr;
147}
148
149bool isHTMLIdentifierCharacter(char C) {
150 return (C >= 'a' && C <= 'z') ||
151 (C >= 'A' && C <= 'Z') ||
152 (C >= '0' && C <= '9');
153}
154
155const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
156 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
157 if (!isHTMLIdentifierCharacter(*BufferPtr))
158 return BufferPtr;
159 }
160 return BufferEnd;
161}
162
163/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
164/// string allowed.
165///
166/// Returns pointer to closing quote.
167const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
168{
169 const char Quote = *BufferPtr;
170 assert(Quote == '\"' || Quote == '\'');
171
172 BufferPtr++;
173 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
174 const char C = *BufferPtr;
175 if (C == Quote && BufferPtr[-1] != '\\')
176 return BufferPtr;
177 }
178 return BufferEnd;
179}
180
181bool isHorizontalWhitespace(char C) {
182 return C == ' ' || C == '\t' || C == '\f' || C == '\v';
183}
184
185bool isWhitespace(char C) {
186 return C == ' ' || C == '\n' || C == '\r' ||
187 C == '\t' || C == '\f' || C == '\v';
188}
189
190const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
191 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
192 if (!isWhitespace(*BufferPtr))
193 return BufferPtr;
194 }
195 return BufferEnd;
196}
197
198bool isCommandNameCharacter(char C) {
199 return (C >= 'a' && C <= 'z') ||
200 (C >= 'A' && C <= 'Z') ||
201 (C >= '0' && C <= '9');
202}
203
204const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
205 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
206 if (!isCommandNameCharacter(*BufferPtr))
207 return BufferPtr;
208 }
209 return BufferEnd;
210}
211
212/// Return the one past end pointer for BCPL comments.
213/// Handles newlines escaped with backslash or trigraph for backslahs.
214const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
215 const char *CurPtr = BufferPtr;
216 while (CurPtr != BufferEnd) {
217 char C = *CurPtr;
218 while (C != '\n' && C != '\r') {
219 CurPtr++;
220 if (CurPtr == BufferEnd)
221 return BufferEnd;
222 C = *CurPtr;
223 }
224 // We found a newline, check if it is escaped.
225 const char *EscapePtr = CurPtr - 1;
226 while(isHorizontalWhitespace(*EscapePtr))
227 EscapePtr--;
228
229 if (*EscapePtr == '\\' ||
230 (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
231 EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
232 // We found an escaped newline.
233 CurPtr = skipNewline(CurPtr, BufferEnd);
234 } else
235 return CurPtr; // Not an escaped newline.
236 }
237 return BufferEnd;
238}
239
240/// Return the one past end pointer for C comments.
241/// Very dumb, does not handle escaped newlines or trigraphs.
242const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
243 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
244 if (*BufferPtr == '*') {
245 assert(BufferPtr + 1 != BufferEnd);
246 if (*(BufferPtr + 1) == '/')
247 return BufferPtr;
248 }
249 }
250 llvm_unreachable("buffer end hit before '*/' was seen");
251}
252} // unnamed namespace
253
254void Lexer::lexCommentText(Token &T) {
255 assert(CommentState == LCS_InsideBCPLComment ||
256 CommentState == LCS_InsideCComment);
257
258 switch (State) {
259 case LS_Normal:
260 break;
261 case LS_VerbatimBlockFirstLine:
262 lexVerbatimBlockFirstLine(T);
263 return;
264 case LS_VerbatimBlockBody:
265 lexVerbatimBlockBody(T);
266 return;
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000267 case LS_VerbatimLineText:
268 lexVerbatimLineText(T);
269 return;
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000270 case LS_HTMLOpenTag:
271 lexHTMLOpenTag(T);
272 return;
273 }
274
275 assert(State == LS_Normal);
276
277 const char *TokenPtr = BufferPtr;
278 assert(TokenPtr < CommentEnd);
279 while (TokenPtr != CommentEnd) {
280 switch(*TokenPtr) {
281 case '\\':
282 case '@': {
283 TokenPtr++;
284 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000285 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000286 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000287 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000288 return;
289 }
290 char C = *TokenPtr;
291 switch (C) {
292 default:
293 break;
294
295 case '\\': case '@': case '&': case '$':
296 case '#': case '<': case '>': case '%':
297 case '\"': case '.': case ':':
298 // This is one of \\ \@ \& \$ etc escape sequences.
299 TokenPtr++;
300 if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
301 // This is the \:: escape sequence.
302 TokenPtr++;
303 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000304 StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000305 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000306 T.setText(UnescapedText);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000307 return;
308 }
309
310 // Don't make zero-length commands.
311 if (!isCommandNameCharacter(*TokenPtr)) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000312 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000313 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000314 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000315 return;
316 }
317
318 TokenPtr = skipCommandName(TokenPtr, CommentEnd);
319 unsigned Length = TokenPtr - (BufferPtr + 1);
320
321 // Hardcoded support for lexing LaTeX formula commands
322 // \f$ \f[ \f] \f{ \f} as a single command.
323 if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
324 C = *TokenPtr;
325 if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
326 TokenPtr++;
327 Length++;
328 }
329 }
330
331 const StringRef CommandName(BufferPtr + 1, Length);
332 StringRef EndName;
333
334 if (isVerbatimBlockCommand(CommandName, EndName)) {
335 setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
336 return;
337 }
338 if (isVerbatimLineCommand(CommandName)) {
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000339 setupAndLexVerbatimLine(T, TokenPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000340 return;
341 }
342 formTokenWithChars(T, TokenPtr, tok::command);
343 T.setCommandName(CommandName);
344 return;
345 }
346
347 case '<': {
348 TokenPtr++;
349 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000350 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000351 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000352 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000353 return;
354 }
355 const char C = *TokenPtr;
356 if (isHTMLIdentifierCharacter(C))
357 setupAndLexHTMLOpenTag(T);
358 else if (C == '/')
359 lexHTMLCloseTag(T);
360 return;
361 }
362
363 case '\n':
364 case '\r':
365 TokenPtr = skipNewline(TokenPtr, CommentEnd);
366 formTokenWithChars(T, TokenPtr, tok::newline);
367
368 if (CommentState == LCS_InsideCComment)
369 skipLineStartingDecorations();
370 return;
371
372 default: {
373 while (true) {
374 TokenPtr++;
375 if (TokenPtr == CommentEnd)
376 break;
377 char C = *TokenPtr;
378 if(C == '\n' || C == '\r' ||
379 C == '\\' || C == '@' || C == '<')
380 break;
381 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000382 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000383 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000384 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000385 return;
386 }
387 }
388 }
389}
390
391void Lexer::setupAndLexVerbatimBlock(Token &T,
392 const char *TextBegin,
393 char Marker, StringRef EndName) {
394 VerbatimBlockEndCommandName.clear();
395 VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
396 VerbatimBlockEndCommandName.append(EndName);
397
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000398 StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000399 formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000400 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000401
402 State = LS_VerbatimBlockFirstLine;
403}
404
405void Lexer::lexVerbatimBlockFirstLine(Token &T) {
406 assert(BufferPtr < CommentEnd);
407
408 // FIXME: It would be better to scan the text once, finding either the block
409 // end command or newline.
410 //
411 // Extract current line.
412 const char *Newline = findNewline(BufferPtr, CommentEnd);
413 StringRef Line(BufferPtr, Newline - BufferPtr);
414
415 // Look for end command in current line.
416 size_t Pos = Line.find(VerbatimBlockEndCommandName);
417 const char *NextLine;
418 if (Pos == StringRef::npos) {
419 // Current line is completely verbatim.
420 NextLine = skipNewline(Newline, CommentEnd);
421 } else if (Pos == 0) {
422 // Current line contains just an end command.
423 const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000424 StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000425 formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000426 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000427 State = LS_Normal;
428 return;
429 } else {
430 // There is some text, followed by end command. Extract text first.
431 NextLine = BufferPtr + Pos;
432 }
433
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000434 StringRef Text(BufferPtr, NextLine - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000435 formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000436 T.setVerbatimBlockText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000437
438 State = LS_VerbatimBlockBody;
439}
440
441void Lexer::lexVerbatimBlockBody(Token &T) {
442 assert(State == LS_VerbatimBlockBody);
443
444 if (CommentState == LCS_InsideCComment)
445 skipLineStartingDecorations();
446
447 lexVerbatimBlockFirstLine(T);
448}
449
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000450void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) {
451 const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
452 formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
453 T.setVerbatimLineName(Name);
454
455 State = LS_VerbatimLineText;
456}
457
458void Lexer::lexVerbatimLineText(Token &T) {
459 assert(State == LS_VerbatimLineText);
460
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000461 // Extract current line.
462 const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000463 const StringRef Text(BufferPtr, Newline - BufferPtr);
464 formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000465 T.setVerbatimLineText(Text);
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000466
467 State = LS_Normal;
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000468}
469
470void Lexer::setupAndLexHTMLOpenTag(Token &T) {
471 assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
472 const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
473
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000474 StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000475 formTokenWithChars(T, TagNameEnd, tok::html_tag_open);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000476 T.setHTMLTagOpenName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000477
478 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
479
480 if (BufferPtr != CommentEnd && *BufferPtr == '>') {
481 BufferPtr++;
482 return;
483 }
484
485 if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
486 State = LS_HTMLOpenTag;
487}
488
489void Lexer::lexHTMLOpenTag(Token &T) {
490 assert(State == LS_HTMLOpenTag);
491
492 const char *TokenPtr = BufferPtr;
493 char C = *TokenPtr;
494 if (isHTMLIdentifierCharacter(C)) {
495 TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000496 StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000497 formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000498 T.setHTMLIdent(Ident);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000499 } else {
500 switch (C) {
501 case '=':
502 TokenPtr++;
503 formTokenWithChars(T, TokenPtr, tok::html_equals);
504 break;
505 case '\"':
506 case '\'': {
507 const char *OpenQuote = TokenPtr;
508 TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
509 const char *ClosingQuote = TokenPtr;
510 if (TokenPtr != CommentEnd) // Skip closing quote.
511 TokenPtr++;
512 formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
513 T.setHTMLQuotedString(StringRef(OpenQuote + 1,
514 ClosingQuote - (OpenQuote + 1)));
515 break;
516 }
517 case '>':
518 TokenPtr++;
519 formTokenWithChars(T, TokenPtr, tok::html_greater);
520 break;
521 }
522 }
523
524 // Now look ahead and return to normal state if we don't see any HTML tokens
525 // ahead.
526 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
527 if (BufferPtr == CommentEnd) {
528 State = LS_Normal;
529 return;
530 }
531
532 C = *BufferPtr;
533 if (!isHTMLIdentifierCharacter(C) &&
534 C != '=' && C != '\"' && C != '\'' && C != '>') {
535 State = LS_Normal;
536 return;
537 }
538}
539
540void Lexer::lexHTMLCloseTag(Token &T) {
541 assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
542
543 const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
544 const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
545
546 const char *End = skipWhitespace(TagNameEnd, CommentEnd);
547 if (End != CommentEnd && *End == '>')
548 End++;
549
550 formTokenWithChars(T, End, tok::html_tag_close);
551 T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
552}
553
554Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
555 const char *BufferStart, const char *BufferEnd):
556 BufferStart(BufferStart), BufferEnd(BufferEnd),
557 FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
558 CommentState(LCS_BeforeComment), State(LS_Normal) {
559}
560
561void Lexer::lex(Token &T) {
562again:
563 switch (CommentState) {
564 case LCS_BeforeComment:
565 if (BufferPtr == BufferEnd) {
566 formTokenWithChars(T, BufferPtr, tok::eof);
567 return;
568 }
569
570 assert(*BufferPtr == '/');
571 BufferPtr++; // Skip first slash.
572 switch(*BufferPtr) {
573 case '/': { // BCPL comment.
574 BufferPtr++; // Skip second slash.
575
576 if (BufferPtr != BufferEnd) {
577 // Skip Doxygen magic marker, if it is present.
578 // It might be missing because of a typo //< or /*<, or because we
579 // merged this non-Doxygen comment into a bunch of Doxygen comments
580 // around it: /** ... */ /* ... */ /** ... */
581 const char C = *BufferPtr;
582 if (C == '/' || C == '!')
583 BufferPtr++;
584 }
585
586 // Skip less-than symbol that marks trailing comments.
587 // Skip it even if the comment is not a Doxygen one, because //< and /*<
588 // are frequent typos.
589 if (BufferPtr != BufferEnd && *BufferPtr == '<')
590 BufferPtr++;
591
592 CommentState = LCS_InsideBCPLComment;
593 State = LS_Normal;
594 CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
595 goto again;
596 }
597 case '*': { // C comment.
598 BufferPtr++; // Skip star.
599
600 // Skip Doxygen magic marker.
601 const char C = *BufferPtr;
602 if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
603 BufferPtr++;
604
605 // Skip less-than symbol that marks trailing comments.
606 if (BufferPtr != BufferEnd && *BufferPtr == '<')
607 BufferPtr++;
608
609 CommentState = LCS_InsideCComment;
610 State = LS_Normal;
611 CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
612 goto again;
613 }
614 default:
615 llvm_unreachable("second character of comment should be '/' or '*'");
616 }
617
618 case LCS_BetweenComments: {
619 // Consecutive comments are extracted only if there is only whitespace
620 // between them. So we can search for the start of the next comment.
621 const char *EndWhitespace = BufferPtr;
622 while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
623 EndWhitespace++;
624
625 // Turn any whitespace between comments (and there is only whitespace
626 // between them) into a newline. We have two newlines between comments
627 // in total (first one was synthesized after a comment).
628 formTokenWithChars(T, EndWhitespace, tok::newline);
629
630 CommentState = LCS_BeforeComment;
631 break;
632 }
633
634 case LCS_InsideBCPLComment:
635 case LCS_InsideCComment:
636 if (BufferPtr != CommentEnd) {
637 lexCommentText(T);
638 break;
639 } else {
640 // Skip C comment closing sequence.
641 if (CommentState == LCS_InsideCComment) {
642 assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
643 BufferPtr += 2;
644 assert(BufferPtr <= BufferEnd);
645
646 // Synthenize newline just after the C comment, regardless if there is
647 // actually a newline.
648 formTokenWithChars(T, BufferPtr, tok::newline);
649
650 CommentState = LCS_BetweenComments;
651 break;
652 } else {
653 // Don't synthesized a newline after BCPL comment.
654 CommentState = LCS_BetweenComments;
655 goto again;
656 }
657 }
658 }
659}
660
661StringRef Lexer::getSpelling(const Token &Tok,
662 const SourceManager &SourceMgr,
663 bool *Invalid) const {
664 SourceLocation Loc = Tok.getLocation();
665 std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
666
667 bool InvalidTemp = false;
668 StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
669 if (InvalidTemp) {
670 *Invalid = true;
671 return StringRef();
672 }
673
674 const char *Begin = File.data() + LocInfo.second;
675 return StringRef(Begin, Tok.getLength());
676}
677
678void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) {
679 VerbatimBlockCommand VBC;
680 VBC.BeginName = BeginName;
681 VBC.EndName = EndName;
682 VerbatimBlockCommands.push_back(VBC);
683}
684
685void Lexer::addVerbatimLineCommand(StringRef Name) {
686 VerbatimLineCommand VLC;
687 VLC.Name = Name;
688 VerbatimLineCommands.push_back(VLC);
689}
690
691} // end namespace comments
692} // end namespace clang
693