blob: 63560e197dd8cdd2c21fb31f3e8e01b6acd4e008 [file] [log] [blame]
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +00001//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "clang/AST/CommentParser.h"
11#include "clang/AST/CommentSema.h"
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +000012#include "clang/AST/CommentDiagnostic.h"
13#include "clang/Basic/SourceManager.h"
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +000014#include "llvm/Support/ErrorHandling.h"
15
16namespace clang {
17namespace comments {
18
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000019/// Re-lexes a sequence of tok::text tokens.
20class TextTokenRetokenizer {
21 llvm::BumpPtrAllocator &Allocator;
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000022 Parser &P;
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000023
24 /// This flag is set when there are no more tokens we can fetch from lexer.
25 bool NoMoreInterestingTokens;
26
27 /// Token buffer: tokens we have processed and lookahead.
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000028 SmallVector<Token, 16> Toks;
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000029
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000030 /// A position in \c Toks.
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000031 struct Position {
32 unsigned CurToken;
33 const char *BufferStart;
34 const char *BufferEnd;
35 const char *BufferPtr;
36 SourceLocation BufferStartLoc;
37 };
38
39 /// Current position in Toks.
40 Position Pos;
41
42 bool isEnd() const {
43 return Pos.CurToken >= Toks.size();
44 }
45
46 /// Sets up the buffer pointers to point to current token.
47 void setupBuffer() {
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000048 assert(!isEnd());
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000049 const Token &Tok = Toks[Pos.CurToken];
50
51 Pos.BufferStart = Tok.getText().begin();
52 Pos.BufferEnd = Tok.getText().end();
53 Pos.BufferPtr = Pos.BufferStart;
54 Pos.BufferStartLoc = Tok.getLocation();
55 }
56
57 SourceLocation getSourceLocation() const {
58 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
59 return Pos.BufferStartLoc.getLocWithOffset(CharNo);
60 }
61
62 char peek() const {
63 assert(!isEnd());
64 assert(Pos.BufferPtr != Pos.BufferEnd);
65 return *Pos.BufferPtr;
66 }
67
68 void consumeChar() {
69 assert(!isEnd());
70 assert(Pos.BufferPtr != Pos.BufferEnd);
71 Pos.BufferPtr++;
72 if (Pos.BufferPtr == Pos.BufferEnd) {
73 Pos.CurToken++;
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000074 if (isEnd() && !addToken())
75 return;
76
77 assert(!isEnd());
78 setupBuffer();
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +000079 }
80 }
81
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000082 /// Add a token.
83 /// Returns true on success, false if there are no interesting tokens to
84 /// fetch from lexer.
85 bool addToken() {
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000086 if (NoMoreInterestingTokens)
Dmitri Gribenkodb13f042012-07-24 17:52:18 +000087 return false;
88
Dmitri Gribenko0c43a922012-07-24 18:23:31 +000089 if (P.Tok.is(tok::newline)) {
90 // If we see a single newline token between text tokens, skip it.
91 Token Newline = P.Tok;
92 P.consumeToken();
93 if (P.Tok.isNot(tok::text)) {
94 P.putBack(Newline);
95 NoMoreInterestingTokens = true;
96 return false;
97 }
98 }
99 if (P.Tok.isNot(tok::text)) {
100 NoMoreInterestingTokens = true;
101 return false;
102 }
103
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000104 Toks.push_back(P.Tok);
105 P.consumeToken();
106 if (Toks.size() == 1)
107 setupBuffer();
108 return true;
109 }
110
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000111 static bool isWhitespace(char C) {
112 return C == ' ' || C == '\n' || C == '\r' ||
113 C == '\t' || C == '\f' || C == '\v';
114 }
115
116 void consumeWhitespace() {
117 while (!isEnd()) {
118 if (isWhitespace(peek()))
119 consumeChar();
120 else
121 break;
122 }
123 }
124
125 void formTokenWithChars(Token &Result,
126 SourceLocation Loc,
127 const char *TokBegin,
128 unsigned TokLength,
129 StringRef Text) {
130 Result.setLocation(Loc);
131 Result.setKind(tok::text);
132 Result.setLength(TokLength);
133#ifndef NDEBUG
134 Result.TextPtr1 = "<UNSET>";
135 Result.TextLen1 = 7;
136#endif
137 Result.setText(Text);
138 }
139
140public:
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000141 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
Dmitri Gribenko0c43a922012-07-24 18:23:31 +0000142 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000143 Pos.CurToken = 0;
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000144 addToken();
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000145 }
146
147 /// Extract a word -- sequence of non-whitespace characters.
148 bool lexWord(Token &Tok) {
149 if (isEnd())
150 return false;
151
152 Position SavedPos = Pos;
153
154 consumeWhitespace();
155 SmallString<32> WordText;
156 const char *WordBegin = Pos.BufferPtr;
157 SourceLocation Loc = getSourceLocation();
158 while (!isEnd()) {
159 const char C = peek();
160 if (!isWhitespace(C)) {
161 WordText.push_back(C);
162 consumeChar();
163 } else
164 break;
165 }
166 const unsigned Length = WordText.size();
167 if (Length == 0) {
168 Pos = SavedPos;
169 return false;
170 }
171
172 char *TextPtr = Allocator.Allocate<char>(Length + 1);
173
174 memcpy(TextPtr, WordText.c_str(), Length + 1);
175 StringRef Text = StringRef(TextPtr, Length);
176
177 formTokenWithChars(Tok, Loc, WordBegin,
178 Pos.BufferPtr - WordBegin, Text);
179 return true;
180 }
181
182 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
183 if (isEnd())
184 return false;
185
186 Position SavedPos = Pos;
187
188 consumeWhitespace();
189 SmallString<32> WordText;
190 const char *WordBegin = Pos.BufferPtr;
191 SourceLocation Loc = getSourceLocation();
192 bool Error = false;
193 if (!isEnd()) {
194 const char C = peek();
195 if (C == OpenDelim) {
196 WordText.push_back(C);
197 consumeChar();
198 } else
199 Error = true;
200 }
201 char C = '\0';
202 while (!Error && !isEnd()) {
203 C = peek();
204 WordText.push_back(C);
205 consumeChar();
206 if (C == CloseDelim)
207 break;
208 }
209 if (!Error && C != CloseDelim)
210 Error = true;
211
212 if (Error) {
213 Pos = SavedPos;
214 return false;
215 }
216
217 const unsigned Length = WordText.size();
218 char *TextPtr = Allocator.Allocate<char>(Length + 1);
219
220 memcpy(TextPtr, WordText.c_str(), Length + 1);
221 StringRef Text = StringRef(TextPtr, Length);
222
223 formTokenWithChars(Tok, Loc, WordBegin,
224 Pos.BufferPtr - WordBegin, Text);
225 return true;
226 }
227
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000228 /// Put back tokens that we didn't consume.
229 void putBackLeftoverTokens() {
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000230 if (isEnd())
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000231 return;
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000232
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000233 bool HavePartialTok = false;
234 Token PartialTok;
235 if (Pos.BufferPtr != Pos.BufferStart) {
236 formTokenWithChars(PartialTok, getSourceLocation(),
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000237 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
238 StringRef(Pos.BufferPtr,
239 Pos.BufferEnd - Pos.BufferPtr));
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000240 HavePartialTok = true;
241 Pos.CurToken++;
242 }
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000243
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000244 P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
245 Pos.CurToken = Toks.size();
246
247 if (HavePartialTok)
248 P.putBack(PartialTok);
Dmitri Gribenkoc4b0f9b2012-07-24 17:43:18 +0000249 }
250};
251
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000252Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
253 const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
254 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000255 consumeToken();
256}
257
258ParamCommandComment *Parser::parseParamCommandArgs(
259 ParamCommandComment *PC,
260 TextTokenRetokenizer &Retokenizer) {
261 Token Arg;
262 // Check if argument looks like direction specification: [dir]
263 // e.g., [in], [out], [in,out]
264 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000265 PC = S.actOnParamCommandDirectionArg(PC,
266 Arg.getLocation(),
267 Arg.getEndLocation(),
268 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000269
270 if (Retokenizer.lexWord(Arg))
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000271 PC = S.actOnParamCommandParamNameArg(PC,
272 Arg.getLocation(),
273 Arg.getEndLocation(),
274 Arg.getText());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000275
276 return PC;
277}
278
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000279TParamCommandComment *Parser::parseTParamCommandArgs(
280 TParamCommandComment *TPC,
281 TextTokenRetokenizer &Retokenizer) {
282 Token Arg;
283 if (Retokenizer.lexWord(Arg))
284 TPC = S.actOnTParamCommandParamNameArg(TPC,
285 Arg.getLocation(),
286 Arg.getEndLocation(),
287 Arg.getText());
288
289 return TPC;
290}
291
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000292BlockCommandComment *Parser::parseBlockCommandArgs(
293 BlockCommandComment *BC,
294 TextTokenRetokenizer &Retokenizer,
295 unsigned NumArgs) {
296 typedef BlockCommandComment::Argument Argument;
Dmitri Gribenko814e2192012-07-06 16:41:59 +0000297 Argument *Args =
298 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000299 unsigned ParsedArgs = 0;
300 Token Arg;
301 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
302 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
303 Arg.getEndLocation()),
304 Arg.getText());
305 ParsedArgs++;
306 }
307
308 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
309}
310
311BlockCommandComment *Parser::parseBlockCommand() {
312 assert(Tok.is(tok::command));
313
314 ParamCommandComment *PC;
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000315 TParamCommandComment *TPC;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000316 BlockCommandComment *BC;
317 bool IsParam = false;
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000318 bool IsTParam = false;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000319 unsigned NumArgs = 0;
320 if (S.isParamCommand(Tok.getCommandName())) {
321 IsParam = true;
322 PC = S.actOnParamCommandStart(Tok.getLocation(),
323 Tok.getEndLocation(),
324 Tok.getCommandName());
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000325 } if (S.isTParamCommand(Tok.getCommandName())) {
326 IsTParam = true;
327 TPC = S.actOnTParamCommandStart(Tok.getLocation(),
328 Tok.getEndLocation(),
329 Tok.getCommandName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000330 } else {
331 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
332 BC = S.actOnBlockCommandStart(Tok.getLocation(),
333 Tok.getEndLocation(),
334 Tok.getCommandName());
335 }
336 consumeToken();
337
338 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
339 // Block command ahead. We can't nest block commands, so pretend that this
340 // command has an empty argument.
Dmitri Gribenkoe5deb792012-07-30 18:05:28 +0000341 ParagraphComment *Paragraph = S.actOnParagraphComment(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000342 ArrayRef<InlineContentComment *>());
Dmitri Gribenkoe5deb792012-07-30 18:05:28 +0000343 return S.actOnBlockCommandFinish(IsParam ? PC : BC, Paragraph);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000344 }
345
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000346 if (IsParam || IsTParam || NumArgs > 0) {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000347 // In order to parse command arguments we need to retokenize a few
348 // following text tokens.
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000349 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000350
351 if (IsParam)
352 PC = parseParamCommandArgs(PC, Retokenizer);
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000353 else if (IsTParam)
354 TPC = parseTParamCommandArgs(TPC, Retokenizer);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000355 else
356 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
357
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000358 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000359 }
360
361 BlockContentComment *Block = parseParagraphOrBlockCommand();
362 // Since we have checked for a block command, we should have parsed a
363 // paragraph.
364 if (IsParam)
365 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000366 else if (IsTParam)
367 return S.actOnTParamCommandFinish(TPC, cast<ParagraphComment>(Block));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000368 else
369 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
370}
371
372InlineCommandComment *Parser::parseInlineCommand() {
373 assert(Tok.is(tok::command));
374
375 const Token CommandTok = Tok;
376 consumeToken();
377
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000378 TextTokenRetokenizer Retokenizer(Allocator, *this);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000379
380 Token ArgTok;
381 bool ArgTokValid = Retokenizer.lexWord(ArgTok);
382
383 InlineCommandComment *IC;
384 if (ArgTokValid) {
385 IC = S.actOnInlineCommand(CommandTok.getLocation(),
386 CommandTok.getEndLocation(),
387 CommandTok.getCommandName(),
388 ArgTok.getLocation(),
389 ArgTok.getEndLocation(),
390 ArgTok.getText());
391 } else {
392 IC = S.actOnInlineCommand(CommandTok.getLocation(),
393 CommandTok.getEndLocation(),
394 CommandTok.getCommandName());
395 }
396
Dmitri Gribenkodb13f042012-07-24 17:52:18 +0000397 Retokenizer.putBackLeftoverTokens();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000398
399 return IC;
400}
401
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000402HTMLStartTagComment *Parser::parseHTMLStartTag() {
403 assert(Tok.is(tok::html_start_tag));
404 HTMLStartTagComment *HST =
405 S.actOnHTMLStartTagStart(Tok.getLocation(),
406 Tok.getHTMLTagStartName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000407 consumeToken();
408
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000409 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000410 while (true) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000411 switch (Tok.getKind()) {
412 case tok::html_ident: {
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000413 Token Ident = Tok;
414 consumeToken();
415 if (Tok.isNot(tok::html_equals)) {
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000416 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
417 Ident.getHTMLIdent()));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000418 continue;
419 }
420 Token Equals = Tok;
421 consumeToken();
422 if (Tok.isNot(tok::html_quoted_string)) {
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000423 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000424 diag::warn_doc_html_start_tag_expected_quoted_string)
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000425 << SourceRange(Equals.getLocation());
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000426 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
427 Ident.getHTMLIdent()));
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000428 while (Tok.is(tok::html_equals) ||
429 Tok.is(tok::html_quoted_string))
430 consumeToken();
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000431 continue;
432 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000433 Attrs.push_back(HTMLStartTagComment::Attribute(
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000434 Ident.getLocation(),
435 Ident.getHTMLIdent(),
436 Equals.getLocation(),
437 SourceRange(Tok.getLocation(),
438 Tok.getEndLocation()),
439 Tok.getHTMLQuotedString()));
440 consumeToken();
441 continue;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000442 }
443
444 case tok::html_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000445 HST = S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000446 S.copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000447 Tok.getLocation(),
448 /* IsSelfClosing = */ false);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000449 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000450 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000451
452 case tok::html_slash_greater:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000453 HST = S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000454 S.copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000455 Tok.getLocation(),
456 /* IsSelfClosing = */ true);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000457 consumeToken();
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000458 return HST;
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000459
460 case tok::html_equals:
461 case tok::html_quoted_string:
462 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000463 diag::warn_doc_html_start_tag_expected_ident_or_greater);
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000464 while (Tok.is(tok::html_equals) ||
465 Tok.is(tok::html_quoted_string))
466 consumeToken();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000467 if (Tok.is(tok::html_ident) ||
468 Tok.is(tok::html_greater) ||
469 Tok.is(tok::html_slash_greater))
470 continue;
471
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000472 return S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000473 S.copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000474 SourceLocation(),
475 /* IsSelfClosing = */ false);
476
477 default:
478 // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
479 HST = S.actOnHTMLStartTagFinish(HST,
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000480 S.copyArray(llvm::makeArrayRef(Attrs)),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000481 SourceLocation(),
482 /* IsSelfClosing = */ false);
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000483 bool StartLineInvalid;
484 const unsigned StartLine = SourceMgr.getPresumedLineNumber(
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000485 HST->getLocation(),
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000486 &StartLineInvalid);
487 bool EndLineInvalid;
488 const unsigned EndLine = SourceMgr.getPresumedLineNumber(
489 Tok.getLocation(),
490 &EndLineInvalid);
491 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
492 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000493 diag::warn_doc_html_start_tag_expected_ident_or_greater)
494 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000495 else {
496 Diag(Tok.getLocation(),
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000497 diag::warn_doc_html_start_tag_expected_ident_or_greater);
498 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
499 << HST->getSourceRange();
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000500 }
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000501 return HST;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000502 }
503 }
504}
505
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000506HTMLEndTagComment *Parser::parseHTMLEndTag() {
507 assert(Tok.is(tok::html_end_tag));
508 Token TokEndTag = Tok;
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000509 consumeToken();
510 SourceLocation Loc;
511 if (Tok.is(tok::html_greater)) {
512 Loc = Tok.getLocation();
513 consumeToken();
514 }
515
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000516 return S.actOnHTMLEndTag(TokEndTag.getLocation(),
517 Loc,
518 TokEndTag.getHTMLTagEndName());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000519}
520
521BlockContentComment *Parser::parseParagraphOrBlockCommand() {
522 SmallVector<InlineContentComment *, 8> Content;
523
524 while (true) {
525 switch (Tok.getKind()) {
526 case tok::verbatim_block_begin:
527 case tok::verbatim_line_name:
528 case tok::eof:
529 assert(Content.size() != 0);
530 break; // Block content or EOF ahead, finish this parapgaph.
531
532 case tok::command:
533 if (S.isBlockCommand(Tok.getCommandName())) {
534 if (Content.size() == 0)
535 return parseBlockCommand();
536 break; // Block command ahead, finish this parapgaph.
537 }
538 if (S.isInlineCommand(Tok.getCommandName())) {
539 Content.push_back(parseInlineCommand());
540 continue;
541 }
542
543 // Not a block command, not an inline command ==> an unknown command.
544 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
545 Tok.getEndLocation(),
546 Tok.getCommandName()));
547 consumeToken();
548 continue;
549
550 case tok::newline: {
551 consumeToken();
552 if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
553 consumeToken();
554 break; // Two newlines -- end of paragraph.
555 }
556 if (Content.size() > 0)
557 Content.back()->addTrailingNewline();
558 continue;
559 }
560
561 // Don't deal with HTML tag soup now.
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000562 case tok::html_start_tag:
563 Content.push_back(parseHTMLStartTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000564 continue;
565
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000566 case tok::html_end_tag:
567 Content.push_back(parseHTMLEndTag());
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000568 continue;
569
570 case tok::text:
571 Content.push_back(S.actOnText(Tok.getLocation(),
572 Tok.getEndLocation(),
573 Tok.getText()));
574 consumeToken();
575 continue;
576
577 case tok::verbatim_block_line:
578 case tok::verbatim_block_end:
579 case tok::verbatim_line_text:
580 case tok::html_ident:
581 case tok::html_equals:
582 case tok::html_quoted_string:
583 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000584 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000585 llvm_unreachable("should not see this token");
586 }
587 break;
588 }
589
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000590 return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000591}
592
593VerbatimBlockComment *Parser::parseVerbatimBlock() {
594 assert(Tok.is(tok::verbatim_block_begin));
595
596 VerbatimBlockComment *VB =
597 S.actOnVerbatimBlockStart(Tok.getLocation(),
598 Tok.getVerbatimBlockName());
599 consumeToken();
600
601 // Don't create an empty line if verbatim opening command is followed
602 // by a newline.
603 if (Tok.is(tok::newline))
604 consumeToken();
605
606 SmallVector<VerbatimBlockLineComment *, 8> Lines;
607 while (Tok.is(tok::verbatim_block_line) ||
608 Tok.is(tok::newline)) {
609 VerbatimBlockLineComment *Line;
610 if (Tok.is(tok::verbatim_block_line)) {
611 Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
612 Tok.getVerbatimBlockText());
613 consumeToken();
614 if (Tok.is(tok::newline)) {
615 consumeToken();
616 }
617 } else {
618 // Empty line, just a tok::newline.
Dmitri Gribenko94572c32012-07-18 21:27:38 +0000619 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000620 consumeToken();
621 }
622 Lines.push_back(Line);
623 }
624
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000625 if (Tok.is(tok::verbatim_block_end)) {
626 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
627 Tok.getVerbatimBlockName(),
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000628 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000629 consumeToken();
630 } else {
631 // Unterminated \\verbatim block
632 VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000633 S.copyArray(llvm::makeArrayRef(Lines)));
Dmitri Gribenko9f08f492012-07-20 20:18:53 +0000634 }
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000635
636 return VB;
637}
638
639VerbatimLineComment *Parser::parseVerbatimLine() {
640 assert(Tok.is(tok::verbatim_line_name));
641
642 Token NameTok = Tok;
643 consumeToken();
644
645 SourceLocation TextBegin;
646 StringRef Text;
647 // Next token might not be a tok::verbatim_line_text if verbatim line
648 // starting command comes just before a newline or comment end.
649 if (Tok.is(tok::verbatim_line_text)) {
650 TextBegin = Tok.getLocation();
651 Text = Tok.getVerbatimLineText();
652 } else {
653 TextBegin = NameTok.getEndLocation();
654 Text = "";
655 }
656
657 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
658 NameTok.getVerbatimLineName(),
659 TextBegin,
660 Text);
661 consumeToken();
662 return VL;
663}
664
665BlockContentComment *Parser::parseBlockContent() {
666 switch (Tok.getKind()) {
667 case tok::text:
668 case tok::command:
Dmitri Gribenko3f38bf22012-07-13 00:44:24 +0000669 case tok::html_start_tag:
670 case tok::html_end_tag:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000671 return parseParagraphOrBlockCommand();
672
673 case tok::verbatim_block_begin:
674 return parseVerbatimBlock();
675
676 case tok::verbatim_line_name:
677 return parseVerbatimLine();
678
679 case tok::eof:
680 case tok::newline:
681 case tok::verbatim_block_line:
682 case tok::verbatim_block_end:
683 case tok::verbatim_line_text:
684 case tok::html_ident:
685 case tok::html_equals:
686 case tok::html_quoted_string:
687 case tok::html_greater:
Dmitri Gribenkoa5ef44f2012-07-11 21:38:39 +0000688 case tok::html_slash_greater:
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000689 llvm_unreachable("should not see this token");
690 }
Matt Beaumont-Gay4d48b5c2012-07-06 21:13:09 +0000691 llvm_unreachable("bogus token kind");
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000692}
693
694FullComment *Parser::parseFullComment() {
695 // Skip newlines at the beginning of the comment.
696 while (Tok.is(tok::newline))
697 consumeToken();
698
699 SmallVector<BlockContentComment *, 8> Blocks;
700 while (Tok.isNot(tok::eof)) {
701 Blocks.push_back(parseBlockContent());
702
703 // Skip extra newlines after paragraph end.
704 while (Tok.is(tok::newline))
705 consumeToken();
706 }
Dmitri Gribenko96b09862012-07-31 22:37:06 +0000707 return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
Dmitri Gribenko8d3ba232012-07-06 00:28:32 +0000708}
709
710} // end namespace comments
711} // end namespace clang