blob: 8317f76b8569f8bb4ccde19a137437e03278fabb [file] [log] [blame]
Dmitri Gribenkoaab83832012-06-20 00:34:58 +00001//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
Chandler Carruth39a3e752012-06-20 09:53:52 +000010#include "clang/AST/RawCommentList.h"
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +000011#include "clang/AST/ASTContext.h"
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +000012#include "clang/AST/Comment.h"
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +000013#include "clang/AST/CommentBriefParser.h"
Dmitri Gribenkoca7f80a2012-08-09 00:03:17 +000014#include "clang/AST/CommentCommandTraits.h"
Chandler Carruth3a022472012-12-04 09:13:33 +000015#include "clang/AST/CommentLexer.h"
16#include "clang/AST/CommentParser.h"
17#include "clang/AST/CommentSema.h"
James Dennett2def1e82015-07-15 19:13:39 +000018#include "clang/Basic/CharInfo.h"
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000019#include "llvm/ADT/STLExtras.h"
20
21using namespace clang;
22
23namespace {
24/// Get comment kind and bool describing if it is a trailing comment.
Dmitri Gribenkoe9585622013-04-26 20:12:49 +000025std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
26 bool ParseAllComments) {
27 const size_t MinCommentLength = ParseAllComments ? 2 : 3;
28 if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000029 return std::make_pair(RawComment::RCK_Invalid, false);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000030
31 RawComment::CommentKind K;
32 if (Comment[1] == '/') {
33 if (Comment.size() < 3)
Abramo Bagnarae06a8882012-07-04 07:30:26 +000034 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000035
36 if (Comment[2] == '/')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000037 K = RawComment::RCK_BCPLSlash;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000038 else if (Comment[2] == '!')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000039 K = RawComment::RCK_BCPLExcl;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000040 else
Abramo Bagnarae06a8882012-07-04 07:30:26 +000041 return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000042 } else {
43 assert(Comment.size() >= 4);
44
45 // Comment lexer does not understand escapes in comment markers, so pretend
46 // that this is not a comment.
47 if (Comment[1] != '*' ||
48 Comment[Comment.size() - 2] != '*' ||
49 Comment[Comment.size() - 1] != '/')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000050 return std::make_pair(RawComment::RCK_Invalid, false);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000051
52 if (Comment[2] == '*')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000053 K = RawComment::RCK_JavaDoc;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000054 else if (Comment[2] == '!')
Abramo Bagnarae06a8882012-07-04 07:30:26 +000055 K = RawComment::RCK_Qt;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000056 else
Abramo Bagnarae06a8882012-07-04 07:30:26 +000057 return std::make_pair(RawComment::RCK_OrdinaryC, false);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000058 }
59 const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
60 return std::make_pair(K, TrailingComment);
61}
62
63bool mergedCommentIsTrailingComment(StringRef Comment) {
64 return (Comment.size() > 3) && (Comment[3] == '<');
65}
James Dennett2def1e82015-07-15 19:13:39 +000066
67/// Returns true if R1 and R2 both have valid locations that start on the same
68/// column.
69bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
70 const RawComment &R2) {
71 SourceLocation L1 = R1.getLocStart();
72 SourceLocation L2 = R2.getLocStart();
73 bool Invalid = false;
74 unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
75 if (!Invalid) {
76 unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
77 return !Invalid && (C1 == C2);
78 }
79 return false;
80}
Dmitri Gribenkoaab83832012-06-20 00:34:58 +000081} // unnamed namespace
82
James Dennett2def1e82015-07-15 19:13:39 +000083/// \brief Determines whether there is only whitespace in `Buffer` between `P`
84/// and the previous line.
85/// \param Buffer The buffer to search in.
86/// \param P The offset from the beginning of `Buffer` to start from.
87/// \return true if all of the characters in `Buffer` ranging from the closest
88/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
89/// are whitespace.
90static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
91 // Search backwards until we see linefeed or carriage return.
92 for (unsigned I = P; I != 0; --I) {
93 char C = Buffer[I - 1];
94 if (isVerticalWhitespace(C))
95 return true;
96 if (!isHorizontalWhitespace(C))
97 return false;
98 }
99 // We hit the beginning of the buffer.
100 return true;
101}
102
103/// Returns whether `K` is an ordinary comment kind.
104static bool isOrdinaryKind(RawComment::CommentKind K) {
105 return (K == RawComment::RCK_OrdinaryBCPL) ||
106 (K == RawComment::RCK_OrdinaryC);
107}
108
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000109RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
Dmitri Gribenkoa7d16ce2013-04-10 15:35:17 +0000110 bool Merged, bool ParseAllComments) :
Dmitri Gribenko60c7ec62012-06-27 05:48:36 +0000111 Range(SR), RawTextValid(false), BriefTextValid(false),
James Dennett2def1e82015-07-15 19:13:39 +0000112 IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000113 ParseAllComments(ParseAllComments) {
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000114 // Extract raw comment text, if possible.
Dmitri Gribenkofecc2e02012-06-21 21:02:45 +0000115 if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
Abramo Bagnarae06a8882012-07-04 07:30:26 +0000116 Kind = RCK_Invalid;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000117 return;
118 }
119
James Dennett2def1e82015-07-15 19:13:39 +0000120 // Guess comment kind.
121 std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
122
123 // Guess whether an ordinary comment is trailing.
124 if (ParseAllComments && isOrdinaryKind(K.first)) {
125 FileID BeginFileID;
126 unsigned BeginOffset;
127 std::tie(BeginFileID, BeginOffset) =
128 SourceMgr.getDecomposedLoc(Range.getBegin());
129 if (BeginOffset != 0) {
130 bool Invalid = false;
131 const char *Buffer =
132 SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133 IsTrailingComment |=
134 (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135 }
136 }
137
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000138 if (!Merged) {
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000139 Kind = K.first;
James Dennett2def1e82015-07-15 19:13:39 +0000140 IsTrailingComment |= K.second;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000141
142 IsAlmostTrailingComment = RawText.startswith("//<") ||
143 RawText.startswith("/*<");
144 } else {
Abramo Bagnarae06a8882012-07-04 07:30:26 +0000145 Kind = RCK_Merged;
James Dennett2def1e82015-07-15 19:13:39 +0000146 IsTrailingComment =
147 IsTrailingComment || mergedCommentIsTrailingComment(RawText);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000148 }
149}
150
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000151StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152 FileID BeginFileID;
153 FileID EndFileID;
154 unsigned BeginOffset;
155 unsigned EndOffset;
156
Benjamin Kramer867ea1d2014-03-02 13:01:17 +0000157 std::tie(BeginFileID, BeginOffset) =
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000158 SourceMgr.getDecomposedLoc(Range.getBegin());
Benjamin Kramer867ea1d2014-03-02 13:01:17 +0000159 std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000160
161 const unsigned Length = EndOffset - BeginOffset;
162 if (Length < 2)
163 return StringRef();
164
165 // The comment can't begin in one file and end in another.
166 assert(BeginFileID == EndFileID);
167
168 bool Invalid = false;
169 const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170 &Invalid).data();
171 if (Invalid)
172 return StringRef();
173
174 return StringRef(BufferStart + BeginOffset, Length);
175}
176
Dmitri Gribenko3292d062012-07-02 17:35:10 +0000177const char *RawComment::extractBriefText(const ASTContext &Context) const {
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +0000178 // Make sure that RawText is valid.
179 getRawText(Context.getSourceManager());
180
Dmitri Gribenko4586df72012-07-27 20:37:06 +0000181 // Since we will be copying the resulting text, all allocations made during
182 // parsing are garbage after resulting string is formed. Thus we can use
183 // a separate allocator for all temporary stuff.
184 llvm::BumpPtrAllocator Allocator;
185
Fariborz Jahanian5b637072013-05-03 23:15:20 +0000186 comments::Lexer L(Allocator, Context.getDiagnostics(),
187 Context.getCommentCommandTraits(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000188 Range.getBegin(),
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +0000189 RawText.begin(), RawText.end());
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000190 comments::BriefParser P(L, Context.getCommentCommandTraits());
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +0000191
192 const std::string Result = P.Parse();
193 const unsigned BriefTextLength = Result.size();
194 char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195 memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
Dmitri Gribenko3292d062012-07-02 17:35:10 +0000196 BriefText = BriefTextPtr;
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +0000197 BriefTextValid = true;
198
Dmitri Gribenko3292d062012-07-02 17:35:10 +0000199 return BriefTextPtr;
Dmitri Gribenko5188c4b2012-06-26 20:39:18 +0000200}
201
Dmitri Gribenkob2610882012-08-14 17:17:18 +0000202comments::FullComment *RawComment::parse(const ASTContext &Context,
Dmitri Gribenko6743e042012-09-29 11:40:46 +0000203 const Preprocessor *PP,
Dmitri Gribenkob2610882012-08-14 17:17:18 +0000204 const Decl *D) const {
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +0000205 // Make sure that RawText is valid.
206 getRawText(Context.getSourceManager());
207
Fariborz Jahanian5b637072013-05-03 23:15:20 +0000208 comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209 Context.getCommentCommandTraits(),
Dmitri Gribenko6bab9112012-08-31 10:35:30 +0000210 getSourceRange().getBegin(),
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +0000211 RawText.begin(), RawText.end());
212 comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000213 Context.getDiagnostics(),
Dmitri Gribenko6743e042012-09-29 11:40:46 +0000214 Context.getCommentCommandTraits(),
215 PP);
Dmitri Gribenkob2610882012-08-14 17:17:18 +0000216 S.setDecl(D);
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +0000217 comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
Dmitri Gribenko7acbf002012-09-10 20:32:42 +0000218 Context.getDiagnostics(),
219 Context.getCommentCommandTraits());
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +0000220
Dmitri Gribenkob2610882012-08-14 17:17:18 +0000221 return P.parseFullComment();
Dmitri Gribenkoa43ec182012-08-11 00:51:43 +0000222}
223
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000224static bool onlyWhitespaceBetween(SourceManager &SM,
225 SourceLocation Loc1, SourceLocation Loc2,
226 unsigned MaxNewlinesAllowed) {
Dmitri Gribenko1d0f5672012-09-09 20:47:31 +0000227 std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228 std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000229
Dmitri Gribenko1d0f5672012-09-09 20:47:31 +0000230 // Question does not make sense if locations are in different files.
231 if (Loc1Info.first != Loc2Info.first)
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000232 return false;
233
234 bool Invalid = false;
Dmitri Gribenko1d0f5672012-09-09 20:47:31 +0000235 const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000236 if (Invalid)
237 return false;
238
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000239 unsigned NumNewlines = 0;
240 assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241 // Look for non-whitespace characters and remember any newlines seen.
242 for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243 switch (Buffer[I]) {
244 default:
245 return false;
246 case ' ':
247 case '\t':
248 case '\f':
249 case '\v':
250 break;
251 case '\r':
252 case '\n':
253 ++NumNewlines;
254
255 // Check if we have found more than the maximum allowed number of
256 // newlines.
257 if (NumNewlines > MaxNewlinesAllowed)
258 return false;
259
260 // Collapse \r\n and \n\r into a single newline.
261 if (I + 1 != Loc2Info.second &&
262 (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
263 Buffer[I] != Buffer[I + 1])
264 ++I;
265 break;
266 }
267 }
268
269 return true;
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000270}
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000271
Dmitri Gribenko7dd29d42012-07-06 18:19:34 +0000272void RawCommentList::addComment(const RawComment &RC,
273 llvm::BumpPtrAllocator &Allocator) {
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000274 if (RC.isInvalid())
275 return;
276
Dmitri Gribenko92307402012-06-21 22:04:37 +0000277 // Check if the comments are not in source order.
278 while (!Comments.empty() &&
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000279 !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
280 RC.getLocStart())) {
Dmitri Gribenko92307402012-06-21 22:04:37 +0000281 // If they are, just pop a few last comments that don't fit.
282 // This happens if an \#include directive contains comments.
283 Comments.pop_back();
284 }
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000285
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000286 // Ordinary comments are not interesting for us.
287 if (RC.isOrdinary())
288 return;
289
290 // If this is the first Doxygen comment, save it (because there isn't
291 // anything to merge it with).
292 if (Comments.empty()) {
Dmitri Gribenko7dd29d42012-07-06 18:19:34 +0000293 Comments.push_back(new (Allocator) RawComment(RC));
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000294 return;
295 }
296
Dmitri Gribenko7dd29d42012-07-06 18:19:34 +0000297 const RawComment &C1 = *Comments.back();
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000298 const RawComment &C2 = RC;
299
300 // Merge comments only if there is only whitespace between them.
James Dennett2def1e82015-07-15 19:13:39 +0000301 // Can't merge trailing and non-trailing comments unless the second is
302 // non-trailing ordinary in the same column, as in the case:
303 // int x; // documents x
304 // // more text
305 // versus:
306 // int x; // documents x
307 // int y; // documents y
308 // or:
309 // int x; // documents x
310 // // documents y
311 // int y;
Dmitri Gribenko557a8d52012-08-28 01:20:53 +0000312 // Merge comments if they are on same or consecutive lines.
James Dennett2def1e82015-07-15 19:13:39 +0000313 if ((C1.isTrailingComment() == C2.isTrailingComment() ||
314 (C1.isTrailingComment() && !C2.isTrailingComment() &&
315 isOrdinaryKind(C2.getKind()) &&
316 commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000317 onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
318 /*MaxNewlinesAllowed=*/1)) {
319 SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
320 *Comments.back() = RawComment(SourceMgr, MergedRange, true,
321 RC.isParseAllComments());
322 } else {
Dmitri Gribenko7dd29d42012-07-06 18:19:34 +0000323 Comments.push_back(new (Allocator) RawComment(RC));
Benjamin Kramer20c28be2013-09-28 15:06:27 +0000324 }
Dmitri Gribenkoaab83832012-06-20 00:34:58 +0000325}
Dmitri Gribenko9ee0e302014-03-27 15:40:39 +0000326
327void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328 std::vector<RawComment *> MergedComments;
329 MergedComments.reserve(Comments.size() + DeserializedComments.size());
330
331 std::merge(Comments.begin(), Comments.end(),
332 DeserializedComments.begin(), DeserializedComments.end(),
333 std::back_inserter(MergedComments),
334 BeforeThanCompare<RawComment>(SourceMgr));
335 std::swap(Comments, MergedComments);
336}
337