blob: c7f8aa7e16a0a4f83eb8a8c9af95148e1289ca06 [file] [log] [blame]
Dmitri Gribenkoec925312012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Dmitri Gribenkoec925312012-07-06 00:28:32 +00006//
7//===----------------------------------------------------------------------===//
8
9#include "clang/AST/CommentParser.h"
Dmitri Gribenkoca7f80a2012-08-09 00:03:17 +000010#include "clang/AST/CommentCommandTraits.h"
Chandler Carruth3a022472012-12-04 09:13:33 +000011#include "clang/AST/CommentDiagnostic.h"
12#include "clang/AST/CommentSema.h"
Dmitri Gribenkobcef3412013-02-09 15:16:58 +000013#include "clang/Basic/CharInfo.h"
Dmitri Gribenkof26054f2012-07-11 21:38:39 +000014#include "clang/Basic/SourceManager.h"
Dmitri Gribenkoec925312012-07-06 00:28:32 +000015#include "llvm/Support/ErrorHandling.h"
16
17namespace clang {
Dmitri Gribenko1e50cbf2013-08-23 18:03:40 +000018
19static inline bool isWhitespace(llvm::StringRef S) {
20 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21 if (!isWhitespace(*I))
22 return false;
23 }
24 return true;
25}
26
Dmitri Gribenkoec925312012-07-06 00:28:32 +000027namespace comments {
28
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000029/// Re-lexes a sequence of tok::text tokens.
30class TextTokenRetokenizer {
31 llvm::BumpPtrAllocator &Allocator;
Dmitri Gribenko0a363022012-07-24 17:52:18 +000032 Parser &P;
Dmitri Gribenko35b0c092012-07-24 18:23:31 +000033
34 /// This flag is set when there are no more tokens we can fetch from lexer.
35 bool NoMoreInterestingTokens;
36
37 /// Token buffer: tokens we have processed and lookahead.
Dmitri Gribenko0a363022012-07-24 17:52:18 +000038 SmallVector<Token, 16> Toks;
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000039
Dmitri Gribenko35b0c092012-07-24 18:23:31 +000040 /// A position in \c Toks.
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000041 struct Position {
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000042 const char *BufferStart;
43 const char *BufferEnd;
44 const char *BufferPtr;
45 SourceLocation BufferStartLoc;
Alexander Shaposhnikovc9370262016-09-20 18:32:48 +000046 unsigned CurToken;
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000047 };
48
49 /// Current position in Toks.
50 Position Pos;
51
52 bool isEnd() const {
53 return Pos.CurToken >= Toks.size();
54 }
55
56 /// Sets up the buffer pointers to point to current token.
57 void setupBuffer() {
Dmitri Gribenko0a363022012-07-24 17:52:18 +000058 assert(!isEnd());
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000059 const Token &Tok = Toks[Pos.CurToken];
60
61 Pos.BufferStart = Tok.getText().begin();
62 Pos.BufferEnd = Tok.getText().end();
63 Pos.BufferPtr = Pos.BufferStart;
64 Pos.BufferStartLoc = Tok.getLocation();
65 }
66
67 SourceLocation getSourceLocation() const {
68 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70 }
71
72 char peek() const {
73 assert(!isEnd());
74 assert(Pos.BufferPtr != Pos.BufferEnd);
75 return *Pos.BufferPtr;
76 }
77
78 void consumeChar() {
79 assert(!isEnd());
80 assert(Pos.BufferPtr != Pos.BufferEnd);
81 Pos.BufferPtr++;
82 if (Pos.BufferPtr == Pos.BufferEnd) {
83 Pos.CurToken++;
Dmitri Gribenko35b0c092012-07-24 18:23:31 +000084 if (isEnd() && !addToken())
85 return;
86
87 assert(!isEnd());
88 setupBuffer();
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +000089 }
90 }
91
Dmitri Gribenko0a363022012-07-24 17:52:18 +000092 /// Add a token.
93 /// Returns true on success, false if there are no interesting tokens to
94 /// fetch from lexer.
95 bool addToken() {
Dmitri Gribenko35b0c092012-07-24 18:23:31 +000096 if (NoMoreInterestingTokens)
Dmitri Gribenko0a363022012-07-24 17:52:18 +000097 return false;
98
Dmitri Gribenko35b0c092012-07-24 18:23:31 +000099 if (P.Tok.is(tok::newline)) {
100 // If we see a single newline token between text tokens, skip it.
101 Token Newline = P.Tok;
102 P.consumeToken();
103 if (P.Tok.isNot(tok::text)) {
104 P.putBack(Newline);
105 NoMoreInterestingTokens = true;
106 return false;
107 }
108 }
109 if (P.Tok.isNot(tok::text)) {
110 NoMoreInterestingTokens = true;
111 return false;
112 }
113
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000114 Toks.push_back(P.Tok);
115 P.consumeToken();
116 if (Toks.size() == 1)
117 setupBuffer();
118 return true;
119 }
120
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000121 void consumeWhitespace() {
122 while (!isEnd()) {
123 if (isWhitespace(peek()))
124 consumeChar();
125 else
126 break;
127 }
128 }
129
130 void formTokenWithChars(Token &Result,
131 SourceLocation Loc,
132 const char *TokBegin,
133 unsigned TokLength,
134 StringRef Text) {
135 Result.setLocation(Loc);
136 Result.setKind(tok::text);
137 Result.setLength(TokLength);
138#ifndef NDEBUG
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000139 Result.TextPtr = "<UNSET>";
140 Result.IntVal = 7;
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000141#endif
142 Result.setText(Text);
143 }
144
145public:
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000146 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
Dmitri Gribenko35b0c092012-07-24 18:23:31 +0000147 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000148 Pos.CurToken = 0;
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000149 addToken();
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000150 }
151
152 /// Extract a word -- sequence of non-whitespace characters.
153 bool lexWord(Token &Tok) {
154 if (isEnd())
155 return false;
156
157 Position SavedPos = Pos;
158
159 consumeWhitespace();
160 SmallString<32> WordText;
161 const char *WordBegin = Pos.BufferPtr;
162 SourceLocation Loc = getSourceLocation();
163 while (!isEnd()) {
164 const char C = peek();
165 if (!isWhitespace(C)) {
166 WordText.push_back(C);
167 consumeChar();
168 } else
169 break;
170 }
171 const unsigned Length = WordText.size();
172 if (Length == 0) {
173 Pos = SavedPos;
174 return false;
175 }
176
177 char *TextPtr = Allocator.Allocate<char>(Length + 1);
178
179 memcpy(TextPtr, WordText.c_str(), Length + 1);
180 StringRef Text = StringRef(TextPtr, Length);
181
Dmitri Gribenkoc55c6fc2012-12-19 17:34:55 +0000182 formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000183 return true;
184 }
185
186 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187 if (isEnd())
188 return false;
189
190 Position SavedPos = Pos;
191
192 consumeWhitespace();
193 SmallString<32> WordText;
194 const char *WordBegin = Pos.BufferPtr;
195 SourceLocation Loc = getSourceLocation();
196 bool Error = false;
197 if (!isEnd()) {
198 const char C = peek();
199 if (C == OpenDelim) {
200 WordText.push_back(C);
201 consumeChar();
202 } else
203 Error = true;
204 }
205 char C = '\0';
206 while (!Error && !isEnd()) {
207 C = peek();
208 WordText.push_back(C);
209 consumeChar();
210 if (C == CloseDelim)
211 break;
212 }
213 if (!Error && C != CloseDelim)
214 Error = true;
215
216 if (Error) {
217 Pos = SavedPos;
218 return false;
219 }
220
221 const unsigned Length = WordText.size();
222 char *TextPtr = Allocator.Allocate<char>(Length + 1);
223
224 memcpy(TextPtr, WordText.c_str(), Length + 1);
225 StringRef Text = StringRef(TextPtr, Length);
226
227 formTokenWithChars(Tok, Loc, WordBegin,
228 Pos.BufferPtr - WordBegin, Text);
229 return true;
230 }
231
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000232 /// Put back tokens that we didn't consume.
233 void putBackLeftoverTokens() {
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000234 if (isEnd())
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000235 return;
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000236
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000237 bool HavePartialTok = false;
238 Token PartialTok;
239 if (Pos.BufferPtr != Pos.BufferStart) {
240 formTokenWithChars(PartialTok, getSourceLocation(),
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000241 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242 StringRef(Pos.BufferPtr,
243 Pos.BufferEnd - Pos.BufferPtr));
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000244 HavePartialTok = true;
245 Pos.CurToken++;
246 }
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000247
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000248 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249 Pos.CurToken = Toks.size();
250
251 if (HavePartialTok)
252 P.putBack(PartialTok);
Dmitri Gribenko1bfd9da2012-07-24 17:43:18 +0000253 }
254};
255
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000256Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
Dmitri Gribenkoca7f80a2012-08-09 00:03:17 +0000257 const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258 const CommandTraits &Traits):
259 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
260 Traits(Traits) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000261 consumeToken();
262}
263
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000264void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265 TextTokenRetokenizer &Retokenizer) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000266 Token Arg;
267 // Check if argument looks like direction specification: [dir]
268 // e.g., [in], [out], [in,out]
269 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000270 S.actOnParamCommandDirectionArg(PC,
271 Arg.getLocation(),
272 Arg.getEndLocation(),
273 Arg.getText());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000274
275 if (Retokenizer.lexWord(Arg))
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000276 S.actOnParamCommandParamNameArg(PC,
277 Arg.getLocation(),
278 Arg.getEndLocation(),
279 Arg.getText());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000280}
281
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000282void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283 TextTokenRetokenizer &Retokenizer) {
Dmitri Gribenko34df2202012-07-31 22:37:06 +0000284 Token Arg;
285 if (Retokenizer.lexWord(Arg))
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000286 S.actOnTParamCommandParamNameArg(TPC,
287 Arg.getLocation(),
288 Arg.getEndLocation(),
289 Arg.getText());
Dmitri Gribenko34df2202012-07-31 22:37:06 +0000290}
291
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000292void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
293 TextTokenRetokenizer &Retokenizer,
294 unsigned NumArgs) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000295 typedef BlockCommandComment::Argument Argument;
Dmitri Gribenkobacb9f62012-07-06 16:41:59 +0000296 Argument *Args =
297 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000298 unsigned ParsedArgs = 0;
299 Token Arg;
300 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
301 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
302 Arg.getEndLocation()),
303 Arg.getText());
304 ParsedArgs++;
305 }
306
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000307 S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000308}
309
310BlockCommandComment *Parser::parseBlockCommand() {
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000311 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000312
Craig Topper36250ad2014-05-12 05:36:57 +0000313 ParamCommandComment *PC = nullptr;
314 TParamCommandComment *TPC = nullptr;
315 BlockCommandComment *BC = nullptr;
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000316 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
Dmitri Gribenkobcf7f4d2013-03-04 23:06:15 +0000317 CommandMarkerKind CommandMarker =
318 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000319 if (Info->IsParamCommand) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000320 PC = S.actOnParamCommandStart(Tok.getLocation(),
321 Tok.getEndLocation(),
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000322 Tok.getCommandID(),
Dmitri Gribenkobcf7f4d2013-03-04 23:06:15 +0000323 CommandMarker);
Dmitri Gribenko696d7222012-12-19 17:17:09 +0000324 } else if (Info->IsTParamCommand) {
Dmitri Gribenko34df2202012-07-31 22:37:06 +0000325 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
326 Tok.getEndLocation(),
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000327 Tok.getCommandID(),
Dmitri Gribenkobcf7f4d2013-03-04 23:06:15 +0000328 CommandMarker);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000329 } else {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000330 BC = S.actOnBlockCommandStart(Tok.getLocation(),
331 Tok.getEndLocation(),
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000332 Tok.getCommandID(),
Dmitri Gribenkobcf7f4d2013-03-04 23:06:15 +0000333 CommandMarker);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000334 }
335 consumeToken();
336
Dmitri Gribenko71469302013-01-26 00:36:14 +0000337 if (isTokBlockCommand()) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000338 // Block command ahead. We can't nest block commands, so pretend that this
339 // command has an empty argument.
Dmitri Gribenko44ebbd52013-05-05 00:41:58 +0000340 ParagraphComment *Paragraph = S.actOnParagraphComment(None);
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000341 if (PC) {
Dmitri Gribenko6297fa82012-08-06 23:48:44 +0000342 S.actOnParamCommandFinish(PC, Paragraph);
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000343 return PC;
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000344 } else if (TPC) {
Dmitri Gribenko6297fa82012-08-06 23:48:44 +0000345 S.actOnTParamCommandFinish(TPC, Paragraph);
346 return TPC;
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000347 } else {
348 S.actOnBlockCommandFinish(BC, Paragraph);
349 return BC;
350 }
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000351 }
352
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000353 if (PC || TPC || Info->NumArgs > 0) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000354 // In order to parse command arguments we need to retokenize a few
355 // following text tokens.
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000356 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000357
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000358 if (PC)
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000359 parseParamCommandArgs(PC, Retokenizer);
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000360 else if (TPC)
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000361 parseTParamCommandArgs(TPC, Retokenizer);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000362 else
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000363 parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000364
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000365 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000366 }
367
Dmitri Gribenko71469302013-01-26 00:36:14 +0000368 // If there's a block command ahead, we will attach an empty paragraph to
369 // this command.
370 bool EmptyParagraph = false;
371 if (isTokBlockCommand())
372 EmptyParagraph = true;
373 else if (Tok.is(tok::newline)) {
374 Token PrevTok = Tok;
375 consumeToken();
376 EmptyParagraph = isTokBlockCommand();
377 putBack(PrevTok);
378 }
379
380 ParagraphComment *Paragraph;
381 if (EmptyParagraph)
Dmitri Gribenko44ebbd52013-05-05 00:41:58 +0000382 Paragraph = S.actOnParagraphComment(None);
Dmitri Gribenko71469302013-01-26 00:36:14 +0000383 else {
384 BlockContentComment *Block = parseParagraphOrBlockCommand();
385 // Since we have checked for a block command, we should have parsed a
386 // paragraph.
387 Paragraph = cast<ParagraphComment>(Block);
388 }
389
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000390 if (PC) {
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000391 S.actOnParamCommandFinish(PC, Paragraph);
392 return PC;
Dmitri Gribenkoc2c804d2013-04-18 20:50:35 +0000393 } else if (TPC) {
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000394 S.actOnTParamCommandFinish(TPC, Paragraph);
395 return TPC;
396 } else {
397 S.actOnBlockCommandFinish(BC, Paragraph);
398 return BC;
399 }
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000400}
401
402InlineCommandComment *Parser::parseInlineCommand() {
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000403 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000404
405 const Token CommandTok = Tok;
406 consumeToken();
407
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000408 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000409
410 Token ArgTok;
411 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
412
413 InlineCommandComment *IC;
414 if (ArgTokValid) {
415 IC = S.actOnInlineCommand(CommandTok.getLocation(),
416 CommandTok.getEndLocation(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000417 CommandTok.getCommandID(),
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000418 ArgTok.getLocation(),
419 ArgTok.getEndLocation(),
420 ArgTok.getText());
421 } else {
422 IC = S.actOnInlineCommand(CommandTok.getLocation(),
423 CommandTok.getEndLocation(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000424 CommandTok.getCommandID());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000425 }
426
Dmitri Gribenko0a363022012-07-24 17:52:18 +0000427 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000428
429 return IC;
430}
431
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000432HTMLStartTagComment *Parser::parseHTMLStartTag() {
433 assert(Tok.is(tok::html_start_tag));
434 HTMLStartTagComment *HST =
435 S.actOnHTMLStartTagStart(Tok.getLocation(),
436 Tok.getHTMLTagStartName());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000437 consumeToken();
438
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000439 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000440 while (true) {
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000441 switch (Tok.getKind()) {
442 case tok::html_ident: {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000443 Token Ident = Tok;
444 consumeToken();
445 if (Tok.isNot(tok::html_equals)) {
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000446 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
447 Ident.getHTMLIdent()));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000448 continue;
449 }
450 Token Equals = Tok;
451 consumeToken();
452 if (Tok.isNot(tok::html_quoted_string)) {
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000453 Diag(Tok.getLocation(),
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000454 diag::warn_doc_html_start_tag_expected_quoted_string)
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000455 << SourceRange(Equals.getLocation());
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000456 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
457 Ident.getHTMLIdent()));
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000458 while (Tok.is(tok::html_equals) ||
459 Tok.is(tok::html_quoted_string))
460 consumeToken();
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000461 continue;
462 }
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000463 Attrs.push_back(HTMLStartTagComment::Attribute(
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000464 Ident.getLocation(),
465 Ident.getHTMLIdent(),
466 Equals.getLocation(),
467 SourceRange(Tok.getLocation(),
468 Tok.getEndLocation()),
469 Tok.getHTMLQuotedString()));
470 consumeToken();
471 continue;
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000472 }
473
474 case tok::html_greater:
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000475 S.actOnHTMLStartTagFinish(HST,
476 S.copyArray(llvm::makeArrayRef(Attrs)),
477 Tok.getLocation(),
478 /* IsSelfClosing = */ false);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000479 consumeToken();
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000480 return HST;
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000481
482 case tok::html_slash_greater:
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000483 S.actOnHTMLStartTagFinish(HST,
484 S.copyArray(llvm::makeArrayRef(Attrs)),
485 Tok.getLocation(),
486 /* IsSelfClosing = */ true);
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000487 consumeToken();
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000488 return HST;
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000489
490 case tok::html_equals:
491 case tok::html_quoted_string:
492 Diag(Tok.getLocation(),
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000493 diag::warn_doc_html_start_tag_expected_ident_or_greater);
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000494 while (Tok.is(tok::html_equals) ||
495 Tok.is(tok::html_quoted_string))
496 consumeToken();
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000497 if (Tok.is(tok::html_ident) ||
498 Tok.is(tok::html_greater) ||
499 Tok.is(tok::html_slash_greater))
500 continue;
501
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000502 S.actOnHTMLStartTagFinish(HST,
503 S.copyArray(llvm::makeArrayRef(Attrs)),
504 SourceLocation(),
505 /* IsSelfClosing = */ false);
506 return HST;
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000507
508 default:
509 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000510 S.actOnHTMLStartTagFinish(HST,
511 S.copyArray(llvm::makeArrayRef(Attrs)),
512 SourceLocation(),
513 /* IsSelfClosing = */ false);
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000514 bool StartLineInvalid;
515 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000516 HST->getLocation(),
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000517 &StartLineInvalid);
518 bool EndLineInvalid;
519 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
520 Tok.getLocation(),
521 &EndLineInvalid);
522 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
523 Diag(Tok.getLocation(),
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000524 diag::warn_doc_html_start_tag_expected_ident_or_greater)
525 << HST->getSourceRange();
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000526 else {
527 Diag(Tok.getLocation(),
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000528 diag::warn_doc_html_start_tag_expected_ident_or_greater);
529 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
530 << HST->getSourceRange();
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000531 }
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000532 return HST;
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000533 }
534 }
535}
536
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000537HTMLEndTagComment *Parser::parseHTMLEndTag() {
538 assert(Tok.is(tok::html_end_tag));
539 Token TokEndTag = Tok;
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000540 consumeToken();
541 SourceLocation Loc;
542 if (Tok.is(tok::html_greater)) {
543 Loc = Tok.getLocation();
544 consumeToken();
545 }
546
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000547 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
548 Loc,
549 TokEndTag.getHTMLTagEndName());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000550}
551
552BlockContentComment *Parser::parseParagraphOrBlockCommand() {
553 SmallVector<InlineContentComment *, 8> Content;
554
555 while (true) {
556 switch (Tok.getKind()) {
557 case tok::verbatim_block_begin:
558 case tok::verbatim_line_name:
559 case tok::eof:
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000560 break; // Block content or EOF ahead, finish this parapgaph.
561
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000562 case tok::unknown_command:
563 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
564 Tok.getEndLocation(),
565 Tok.getUnknownCommandName()));
566 consumeToken();
567 continue;
568
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000569 case tok::backslash_command:
570 case tok::at_command: {
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000571 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
572 if (Info->IsBlockCommand) {
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000573 if (Content.size() == 0)
574 return parseBlockCommand();
575 break; // Block command ahead, finish this parapgaph.
576 }
Dmitri Gribenko76b91c32012-11-18 00:30:31 +0000577 if (Info->IsVerbatimBlockEndCommand) {
578 Diag(Tok.getLocation(),
579 diag::warn_verbatim_block_end_without_start)
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000580 << Tok.is(tok::at_command)
Dmitri Gribenko76b91c32012-11-18 00:30:31 +0000581 << Info->Name
582 << SourceRange(Tok.getLocation(), Tok.getEndLocation());
583 consumeToken();
584 continue;
585 }
Dmitri Gribenko9304d862012-09-11 19:22:03 +0000586 if (Info->IsUnknownCommand) {
587 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
588 Tok.getEndLocation(),
589 Info->getID()));
590 consumeToken();
591 continue;
592 }
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000593 assert(Info->IsInlineCommand);
594 Content.push_back(parseInlineCommand());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000595 continue;
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000596 }
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000597
598 case tok::newline: {
599 consumeToken();
600 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
601 consumeToken();
602 break; // Two newlines -- end of paragraph.
603 }
Dmitri Gribenko1e50cbf2013-08-23 18:03:40 +0000604 // Also allow [tok::newline, tok::text, tok::newline] if the middle
605 // tok::text is just whitespace.
606 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
607 Token WhitespaceTok = Tok;
608 consumeToken();
609 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
610 consumeToken();
611 break;
612 }
613 // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
614 putBack(WhitespaceTok);
615 }
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000616 if (Content.size() > 0)
617 Content.back()->addTrailingNewline();
618 continue;
619 }
620
621 // Don't deal with HTML tag soup now.
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000622 case tok::html_start_tag:
623 Content.push_back(parseHTMLStartTag());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000624 continue;
625
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000626 case tok::html_end_tag:
627 Content.push_back(parseHTMLEndTag());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000628 continue;
629
630 case tok::text:
631 Content.push_back(S.actOnText(Tok.getLocation(),
632 Tok.getEndLocation(),
633 Tok.getText()));
634 consumeToken();
635 continue;
636
637 case tok::verbatim_block_line:
638 case tok::verbatim_block_end:
639 case tok::verbatim_line_text:
640 case tok::html_ident:
641 case tok::html_equals:
642 case tok::html_quoted_string:
643 case tok::html_greater:
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000644 case tok::html_slash_greater:
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000645 llvm_unreachable("should not see this token");
646 }
647 break;
648 }
649
Dmitri Gribenko34df2202012-07-31 22:37:06 +0000650 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000651}
652
653VerbatimBlockComment *Parser::parseVerbatimBlock() {
654 assert(Tok.is(tok::verbatim_block_begin));
655
656 VerbatimBlockComment *VB =
657 S.actOnVerbatimBlockStart(Tok.getLocation(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000658 Tok.getVerbatimBlockID());
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000659 consumeToken();
660
661 // Don't create an empty line if verbatim opening command is followed
662 // by a newline.
663 if (Tok.is(tok::newline))
664 consumeToken();
665
666 SmallVector<VerbatimBlockLineComment *, 8> Lines;
667 while (Tok.is(tok::verbatim_block_line) ||
668 Tok.is(tok::newline)) {
669 VerbatimBlockLineComment *Line;
670 if (Tok.is(tok::verbatim_block_line)) {
671 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
672 Tok.getVerbatimBlockText());
673 consumeToken();
674 if (Tok.is(tok::newline)) {
675 consumeToken();
676 }
677 } else {
678 // Empty line, just a tok::newline.
Dmitri Gribenkob03cc7e2012-07-18 21:27:38 +0000679 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000680 consumeToken();
681 }
682 Lines.push_back(Line);
683 }
684
Dmitri Gribenko66a00c72012-07-20 20:18:53 +0000685 if (Tok.is(tok::verbatim_block_end)) {
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000686 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000687 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000688 Info->Name,
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000689 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko66a00c72012-07-20 20:18:53 +0000690 consumeToken();
691 } else {
692 // Unterminated \\verbatim block
Dmitri Gribenkoa9770ad2012-08-06 19:03:12 +0000693 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
694 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko66a00c72012-07-20 20:18:53 +0000695 }
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000696
697 return VB;
698}
699
700VerbatimLineComment *Parser::parseVerbatimLine() {
701 assert(Tok.is(tok::verbatim_line_name));
702
703 Token NameTok = Tok;
704 consumeToken();
705
706 SourceLocation TextBegin;
707 StringRef Text;
708 // Next token might not be a tok::verbatim_line_text if verbatim line
709 // starting command comes just before a newline or comment end.
710 if (Tok.is(tok::verbatim_line_text)) {
711 TextBegin = Tok.getLocation();
712 Text = Tok.getVerbatimLineText();
713 } else {
714 TextBegin = NameTok.getEndLocation();
715 Text = "";
716 }
717
718 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000719 NameTok.getVerbatimLineID(),
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000720 TextBegin,
721 Text);
722 consumeToken();
723 return VL;
724}
725
726BlockContentComment *Parser::parseBlockContent() {
727 switch (Tok.getKind()) {
728 case tok::text:
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000729 case tok::unknown_command:
Fariborz Jahaniane400cb72013-03-02 02:39:57 +0000730 case tok::backslash_command:
731 case tok::at_command:
Dmitri Gribenkoe00ffc72012-07-13 00:44:24 +0000732 case tok::html_start_tag:
733 case tok::html_end_tag:
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000734 return parseParagraphOrBlockCommand();
735
736 case tok::verbatim_block_begin:
737 return parseVerbatimBlock();
738
739 case tok::verbatim_line_name:
740 return parseVerbatimLine();
741
742 case tok::eof:
743 case tok::newline:
744 case tok::verbatim_block_line:
745 case tok::verbatim_block_end:
746 case tok::verbatim_line_text:
747 case tok::html_ident:
748 case tok::html_equals:
749 case tok::html_quoted_string:
750 case tok::html_greater:
Dmitri Gribenkof26054f2012-07-11 21:38:39 +0000751 case tok::html_slash_greater:
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000752 llvm_unreachable("should not see this token");
753 }
Matt Beaumont-Gay4106ea32012-07-06 21:13:09 +0000754 llvm_unreachable("bogus token kind");
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000755}
756
757FullComment *Parser::parseFullComment() {
758 // Skip newlines at the beginning of the comment.
759 while (Tok.is(tok::newline))
760 consumeToken();
761
762 SmallVector<BlockContentComment *, 8> Blocks;
763 while (Tok.isNot(tok::eof)) {
764 Blocks.push_back(parseBlockContent());
765
766 // Skip extra newlines after paragraph end.
767 while (Tok.is(tok::newline))
768 consumeToken();
769 }
Dmitri Gribenko34df2202012-07-31 22:37:06 +0000770 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
Dmitri Gribenkoec925312012-07-06 00:28:32 +0000771}
772
773} // end namespace comments
774} // end namespace clang