blob: c3a801d924c24b944f129b9c7b7752f11a6a9881 [file] [log] [blame]
Dmitri Gribenko2d44d772012-06-26 20:39:18 +00001#include "clang/AST/CommentLexer.h"
2#include "llvm/ADT/StringSwitch.h"
3#include "llvm/Support/ErrorHandling.h"
4
5namespace clang {
6namespace comments {
7
8void Token::dump(const Lexer &L, const SourceManager &SM) const {
9 llvm::errs() << "comments::Token Kind=" << Kind << " ";
10 Loc.dump(SM);
11 llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
12}
13
14bool Lexer::isVerbatimBlockCommand(StringRef BeginName,
15 StringRef &EndName) const {
16 const char *Result = llvm::StringSwitch<const char *>(BeginName)
17 .Case("code", "endcode")
18 .Case("verbatim", "endverbatim")
19 .Case("htmlonly", "endhtmlonly")
20 .Case("latexonly", "endlatexonly")
21 .Case("xmlonly", "endxmlonly")
22 .Case("manonly", "endmanonly")
23 .Case("rtfonly", "endrtfonly")
24
25 .Case("dot", "enddot")
26 .Case("msc", "endmsc")
27
28 .Case("f$", "f$") // Inline LaTeX formula
29 .Case("f[", "f]") // Displayed LaTeX formula
30 .Case("f{", "f}") // LaTeX environment
31
32 .Default(NULL);
33
34 if (Result) {
35 EndName = Result;
36 return true;
37 }
38
39 for (VerbatimBlockCommandVector::const_iterator
40 I = VerbatimBlockCommands.begin(),
41 E = VerbatimBlockCommands.end();
42 I != E; ++I)
43 if (I->BeginName == BeginName) {
44 EndName = I->EndName;
45 return true;
46 }
47
48 return false;
49}
50
51bool Lexer::isVerbatimLineCommand(StringRef Name) const {
52 bool Result = llvm::StringSwitch<bool>(Name)
53 .Case("fn", true)
54 .Case("var", true)
55 .Case("property", true)
56 .Case("typedef", true)
57
58 .Case("overload", true)
59
60 .Case("defgroup", true)
61 .Case("ingroup", true)
62 .Case("addtogroup", true)
63 .Case("weakgroup", true)
64 .Case("name", true)
65
66 .Case("section", true)
67 .Case("subsection", true)
68 .Case("subsubsection", true)
69 .Case("paragraph", true)
70
71 .Case("mainpage", true)
72 .Case("subpage", true)
73 .Case("ref", true)
74
75 .Default(false);
76
77 if (Result)
78 return true;
79
80 for (VerbatimLineCommandVector::const_iterator
81 I = VerbatimLineCommands.begin(),
82 E = VerbatimLineCommands.end();
83 I != E; ++I)
84 if (I->Name == Name)
85 return true;
86
87 return false;
88}
89
90void Lexer::skipLineStartingDecorations() {
91 // This function should be called only for C comments
92 assert(CommentState == LCS_InsideCComment);
93
94 if (BufferPtr == CommentEnd)
95 return;
96
97 switch (*BufferPtr) {
98 case ' ':
99 case '\t':
100 case '\f':
101 case '\v': {
102 const char *NewBufferPtr = BufferPtr;
103 NewBufferPtr++;
104 if (NewBufferPtr == CommentEnd)
105 return;
106
107 char C = *NewBufferPtr;
108 while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
109 NewBufferPtr++;
110 if (NewBufferPtr == CommentEnd)
111 return;
112 C = *NewBufferPtr;
113 }
114 if (C == '*')
115 BufferPtr = NewBufferPtr + 1;
116 break;
117 }
118 case '*':
119 BufferPtr++;
120 break;
121 }
122}
123
124namespace {
125const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
126 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
127 const char C = *BufferPtr;
128 if (C == '\n' || C == '\r')
129 return BufferPtr;
130 }
131 return BufferEnd;
132}
133
134const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
135 if (BufferPtr == BufferEnd)
136 return BufferPtr;
137
138 if (*BufferPtr == '\n')
139 BufferPtr++;
140 else {
141 assert(*BufferPtr == '\r');
142 BufferPtr++;
143 if (BufferPtr != BufferEnd && *BufferPtr == '\n')
144 BufferPtr++;
145 }
146 return BufferPtr;
147}
148
149bool isHTMLIdentifierCharacter(char C) {
150 return (C >= 'a' && C <= 'z') ||
151 (C >= 'A' && C <= 'Z') ||
152 (C >= '0' && C <= '9');
153}
154
155const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
156 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
157 if (!isHTMLIdentifierCharacter(*BufferPtr))
158 return BufferPtr;
159 }
160 return BufferEnd;
161}
162
163/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
164/// string allowed.
165///
166/// Returns pointer to closing quote.
167const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
168{
169 const char Quote = *BufferPtr;
170 assert(Quote == '\"' || Quote == '\'');
171
172 BufferPtr++;
173 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
174 const char C = *BufferPtr;
175 if (C == Quote && BufferPtr[-1] != '\\')
176 return BufferPtr;
177 }
178 return BufferEnd;
179}
180
181bool isHorizontalWhitespace(char C) {
182 return C == ' ' || C == '\t' || C == '\f' || C == '\v';
183}
184
185bool isWhitespace(char C) {
186 return C == ' ' || C == '\n' || C == '\r' ||
187 C == '\t' || C == '\f' || C == '\v';
188}
189
190const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
191 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
192 if (!isWhitespace(*BufferPtr))
193 return BufferPtr;
194 }
195 return BufferEnd;
196}
197
198bool isCommandNameCharacter(char C) {
199 return (C >= 'a' && C <= 'z') ||
200 (C >= 'A' && C <= 'Z') ||
201 (C >= '0' && C <= '9');
202}
203
204const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
205 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
206 if (!isCommandNameCharacter(*BufferPtr))
207 return BufferPtr;
208 }
209 return BufferEnd;
210}
211
212/// Return the one past end pointer for BCPL comments.
213/// Handles newlines escaped with backslash or trigraph for backslahs.
214const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
215 const char *CurPtr = BufferPtr;
216 while (CurPtr != BufferEnd) {
217 char C = *CurPtr;
218 while (C != '\n' && C != '\r') {
219 CurPtr++;
220 if (CurPtr == BufferEnd)
221 return BufferEnd;
222 C = *CurPtr;
223 }
224 // We found a newline, check if it is escaped.
225 const char *EscapePtr = CurPtr - 1;
226 while(isHorizontalWhitespace(*EscapePtr))
227 EscapePtr--;
228
229 if (*EscapePtr == '\\' ||
230 (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
231 EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
232 // We found an escaped newline.
233 CurPtr = skipNewline(CurPtr, BufferEnd);
234 } else
235 return CurPtr; // Not an escaped newline.
236 }
237 return BufferEnd;
238}
239
240/// Return the one past end pointer for C comments.
241/// Very dumb, does not handle escaped newlines or trigraphs.
242const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
243 for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
244 if (*BufferPtr == '*') {
245 assert(BufferPtr + 1 != BufferEnd);
246 if (*(BufferPtr + 1) == '/')
247 return BufferPtr;
248 }
249 }
250 llvm_unreachable("buffer end hit before '*/' was seen");
251}
252} // unnamed namespace
253
254void Lexer::lexCommentText(Token &T) {
255 assert(CommentState == LCS_InsideBCPLComment ||
256 CommentState == LCS_InsideCComment);
257
258 switch (State) {
259 case LS_Normal:
260 break;
261 case LS_VerbatimBlockFirstLine:
262 lexVerbatimBlockFirstLine(T);
263 return;
264 case LS_VerbatimBlockBody:
265 lexVerbatimBlockBody(T);
266 return;
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000267 case LS_VerbatimLineText:
268 lexVerbatimLineText(T);
269 return;
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000270 case LS_HTMLOpenTag:
271 lexHTMLOpenTag(T);
272 return;
273 }
274
275 assert(State == LS_Normal);
276
277 const char *TokenPtr = BufferPtr;
278 assert(TokenPtr < CommentEnd);
279 while (TokenPtr != CommentEnd) {
280 switch(*TokenPtr) {
281 case '\\':
282 case '@': {
283 TokenPtr++;
284 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000285 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000286 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000287 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000288 return;
289 }
290 char C = *TokenPtr;
291 switch (C) {
292 default:
293 break;
294
295 case '\\': case '@': case '&': case '$':
296 case '#': case '<': case '>': case '%':
297 case '\"': case '.': case ':':
298 // This is one of \\ \@ \& \$ etc escape sequences.
299 TokenPtr++;
300 if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
301 // This is the \:: escape sequence.
302 TokenPtr++;
303 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000304 StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000305 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000306 T.setText(UnescapedText);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000307 return;
308 }
309
310 // Don't make zero-length commands.
311 if (!isCommandNameCharacter(*TokenPtr)) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000312 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000313 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000314 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000315 return;
316 }
317
318 TokenPtr = skipCommandName(TokenPtr, CommentEnd);
319 unsigned Length = TokenPtr - (BufferPtr + 1);
320
321 // Hardcoded support for lexing LaTeX formula commands
322 // \f$ \f[ \f] \f{ \f} as a single command.
323 if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
324 C = *TokenPtr;
325 if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
326 TokenPtr++;
327 Length++;
328 }
329 }
330
331 const StringRef CommandName(BufferPtr + 1, Length);
332 StringRef EndName;
333
334 if (isVerbatimBlockCommand(CommandName, EndName)) {
335 setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
336 return;
337 }
338 if (isVerbatimLineCommand(CommandName)) {
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000339 setupAndLexVerbatimLine(T, TokenPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000340 return;
341 }
342 formTokenWithChars(T, TokenPtr, tok::command);
343 T.setCommandName(CommandName);
344 return;
345 }
346
347 case '<': {
348 TokenPtr++;
349 if (TokenPtr == CommentEnd) {
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000350 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000351 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000352 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000353 return;
354 }
355 const char C = *TokenPtr;
356 if (isHTMLIdentifierCharacter(C))
357 setupAndLexHTMLOpenTag(T);
358 else if (C == '/')
359 lexHTMLCloseTag(T);
Dmitri Gribenko5676d322012-06-27 23:28:29 +0000360 else {
361 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
362 formTokenWithChars(T, TokenPtr, tok::text);
363 T.setText(Text);
364 }
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000365 return;
366 }
367
368 case '\n':
369 case '\r':
370 TokenPtr = skipNewline(TokenPtr, CommentEnd);
371 formTokenWithChars(T, TokenPtr, tok::newline);
372
373 if (CommentState == LCS_InsideCComment)
374 skipLineStartingDecorations();
375 return;
376
377 default: {
378 while (true) {
379 TokenPtr++;
380 if (TokenPtr == CommentEnd)
381 break;
382 char C = *TokenPtr;
383 if(C == '\n' || C == '\r' ||
384 C == '\\' || C == '@' || C == '<')
385 break;
386 }
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000387 StringRef Text(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000388 formTokenWithChars(T, TokenPtr, tok::text);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000389 T.setText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000390 return;
391 }
392 }
393 }
394}
395
396void Lexer::setupAndLexVerbatimBlock(Token &T,
397 const char *TextBegin,
398 char Marker, StringRef EndName) {
399 VerbatimBlockEndCommandName.clear();
400 VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
401 VerbatimBlockEndCommandName.append(EndName);
402
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000403 StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000404 formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000405 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000406
407 State = LS_VerbatimBlockFirstLine;
408}
409
410void Lexer::lexVerbatimBlockFirstLine(Token &T) {
411 assert(BufferPtr < CommentEnd);
412
413 // FIXME: It would be better to scan the text once, finding either the block
414 // end command or newline.
415 //
416 // Extract current line.
417 const char *Newline = findNewline(BufferPtr, CommentEnd);
418 StringRef Line(BufferPtr, Newline - BufferPtr);
419
420 // Look for end command in current line.
421 size_t Pos = Line.find(VerbatimBlockEndCommandName);
422 const char *NextLine;
423 if (Pos == StringRef::npos) {
424 // Current line is completely verbatim.
425 NextLine = skipNewline(Newline, CommentEnd);
426 } else if (Pos == 0) {
427 // Current line contains just an end command.
428 const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000429 StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000430 formTokenWithChars(T, End, tok::verbatim_block_end);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000431 T.setVerbatimBlockName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000432 State = LS_Normal;
433 return;
434 } else {
435 // There is some text, followed by end command. Extract text first.
436 NextLine = BufferPtr + Pos;
437 }
438
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000439 StringRef Text(BufferPtr, NextLine - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000440 formTokenWithChars(T, NextLine, tok::verbatim_block_line);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000441 T.setVerbatimBlockText(Text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000442
443 State = LS_VerbatimBlockBody;
444}
445
446void Lexer::lexVerbatimBlockBody(Token &T) {
447 assert(State == LS_VerbatimBlockBody);
448
449 if (CommentState == LCS_InsideCComment)
450 skipLineStartingDecorations();
451
452 lexVerbatimBlockFirstLine(T);
453}
454
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000455void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) {
456 const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
457 formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
458 T.setVerbatimLineName(Name);
459
460 State = LS_VerbatimLineText;
461}
462
463void Lexer::lexVerbatimLineText(Token &T) {
464 assert(State == LS_VerbatimLineText);
465
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000466 // Extract current line.
467 const char *Newline = findNewline(BufferPtr, CommentEnd);
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000468 const StringRef Text(BufferPtr, Newline - BufferPtr);
469 formTokenWithChars(T, Newline, tok::verbatim_line_text);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000470 T.setVerbatimLineText(Text);
Dmitri Gribenko962668d2012-06-27 16:53:58 +0000471
472 State = LS_Normal;
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000473}
474
475void Lexer::setupAndLexHTMLOpenTag(Token &T) {
476 assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
477 const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
478
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000479 StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000480 formTokenWithChars(T, TagNameEnd, tok::html_tag_open);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000481 T.setHTMLTagOpenName(Name);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000482
483 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
484
485 if (BufferPtr != CommentEnd && *BufferPtr == '>') {
486 BufferPtr++;
487 return;
488 }
489
490 if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
491 State = LS_HTMLOpenTag;
492}
493
494void Lexer::lexHTMLOpenTag(Token &T) {
495 assert(State == LS_HTMLOpenTag);
496
497 const char *TokenPtr = BufferPtr;
498 char C = *TokenPtr;
499 if (isHTMLIdentifierCharacter(C)) {
500 TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000501 StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000502 formTokenWithChars(T, TokenPtr, tok::html_ident);
Dmitri Gribenkof5e0aea2012-06-27 16:30:35 +0000503 T.setHTMLIdent(Ident);
Dmitri Gribenko2d44d772012-06-26 20:39:18 +0000504 } else {
505 switch (C) {
506 case '=':
507 TokenPtr++;
508 formTokenWithChars(T, TokenPtr, tok::html_equals);
509 break;
510 case '\"':
511 case '\'': {
512 const char *OpenQuote = TokenPtr;
513 TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
514 const char *ClosingQuote = TokenPtr;
515 if (TokenPtr != CommentEnd) // Skip closing quote.
516 TokenPtr++;
517 formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
518 T.setHTMLQuotedString(StringRef(OpenQuote + 1,
519 ClosingQuote - (OpenQuote + 1)));
520 break;
521 }
522 case '>':
523 TokenPtr++;
524 formTokenWithChars(T, TokenPtr, tok::html_greater);
525 break;
526 }
527 }
528
529 // Now look ahead and return to normal state if we don't see any HTML tokens
530 // ahead.
531 BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
532 if (BufferPtr == CommentEnd) {
533 State = LS_Normal;
534 return;
535 }
536
537 C = *BufferPtr;
538 if (!isHTMLIdentifierCharacter(C) &&
539 C != '=' && C != '\"' && C != '\'' && C != '>') {
540 State = LS_Normal;
541 return;
542 }
543}
544
545void Lexer::lexHTMLCloseTag(Token &T) {
546 assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
547
548 const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
549 const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
550
551 const char *End = skipWhitespace(TagNameEnd, CommentEnd);
552 if (End != CommentEnd && *End == '>')
553 End++;
554
555 formTokenWithChars(T, End, tok::html_tag_close);
556 T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
557}
558
559Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
560 const char *BufferStart, const char *BufferEnd):
561 BufferStart(BufferStart), BufferEnd(BufferEnd),
562 FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
563 CommentState(LCS_BeforeComment), State(LS_Normal) {
564}
565
566void Lexer::lex(Token &T) {
567again:
568 switch (CommentState) {
569 case LCS_BeforeComment:
570 if (BufferPtr == BufferEnd) {
571 formTokenWithChars(T, BufferPtr, tok::eof);
572 return;
573 }
574
575 assert(*BufferPtr == '/');
576 BufferPtr++; // Skip first slash.
577 switch(*BufferPtr) {
578 case '/': { // BCPL comment.
579 BufferPtr++; // Skip second slash.
580
581 if (BufferPtr != BufferEnd) {
582 // Skip Doxygen magic marker, if it is present.
583 // It might be missing because of a typo //< or /*<, or because we
584 // merged this non-Doxygen comment into a bunch of Doxygen comments
585 // around it: /** ... */ /* ... */ /** ... */
586 const char C = *BufferPtr;
587 if (C == '/' || C == '!')
588 BufferPtr++;
589 }
590
591 // Skip less-than symbol that marks trailing comments.
592 // Skip it even if the comment is not a Doxygen one, because //< and /*<
593 // are frequent typos.
594 if (BufferPtr != BufferEnd && *BufferPtr == '<')
595 BufferPtr++;
596
597 CommentState = LCS_InsideBCPLComment;
598 State = LS_Normal;
599 CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
600 goto again;
601 }
602 case '*': { // C comment.
603 BufferPtr++; // Skip star.
604
605 // Skip Doxygen magic marker.
606 const char C = *BufferPtr;
607 if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
608 BufferPtr++;
609
610 // Skip less-than symbol that marks trailing comments.
611 if (BufferPtr != BufferEnd && *BufferPtr == '<')
612 BufferPtr++;
613
614 CommentState = LCS_InsideCComment;
615 State = LS_Normal;
616 CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
617 goto again;
618 }
619 default:
620 llvm_unreachable("second character of comment should be '/' or '*'");
621 }
622
623 case LCS_BetweenComments: {
624 // Consecutive comments are extracted only if there is only whitespace
625 // between them. So we can search for the start of the next comment.
626 const char *EndWhitespace = BufferPtr;
627 while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
628 EndWhitespace++;
629
630 // Turn any whitespace between comments (and there is only whitespace
631 // between them) into a newline. We have two newlines between comments
632 // in total (first one was synthesized after a comment).
633 formTokenWithChars(T, EndWhitespace, tok::newline);
634
635 CommentState = LCS_BeforeComment;
636 break;
637 }
638
639 case LCS_InsideBCPLComment:
640 case LCS_InsideCComment:
641 if (BufferPtr != CommentEnd) {
642 lexCommentText(T);
643 break;
644 } else {
645 // Skip C comment closing sequence.
646 if (CommentState == LCS_InsideCComment) {
647 assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
648 BufferPtr += 2;
649 assert(BufferPtr <= BufferEnd);
650
651 // Synthenize newline just after the C comment, regardless if there is
652 // actually a newline.
653 formTokenWithChars(T, BufferPtr, tok::newline);
654
655 CommentState = LCS_BetweenComments;
656 break;
657 } else {
658 // Don't synthesized a newline after BCPL comment.
659 CommentState = LCS_BetweenComments;
660 goto again;
661 }
662 }
663 }
664}
665
666StringRef Lexer::getSpelling(const Token &Tok,
667 const SourceManager &SourceMgr,
668 bool *Invalid) const {
669 SourceLocation Loc = Tok.getLocation();
670 std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
671
672 bool InvalidTemp = false;
673 StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
674 if (InvalidTemp) {
675 *Invalid = true;
676 return StringRef();
677 }
678
679 const char *Begin = File.data() + LocInfo.second;
680 return StringRef(Begin, Tok.getLength());
681}
682
683void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) {
684 VerbatimBlockCommand VBC;
685 VBC.BeginName = BeginName;
686 VBC.EndName = EndName;
687 VerbatimBlockCommands.push_back(VBC);
688}
689
690void Lexer::addVerbatimLineCommand(StringRef Name) {
691 VerbatimLineCommand VLC;
692 VLC.Name = Name;
693 VerbatimLineCommands.push_back(VLC);
694}
695
696} // end namespace comments
697} // end namespace clang
698