blob: fb85609a249dd4ef86e8936c915ffa86d672ed2d [file] [log] [blame]
Raphael Isemann80814282020-01-24 08:23:27 +01001//===-- CPlusPlusNameParser.cpp -------------------------------------------===//
Eugene Zemtsova633ee62017-04-06 22:36:02 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Eugene Zemtsova633ee62017-04-06 22:36:02 +00006//
7//===----------------------------------------------------------------------===//
8
9#include "CPlusPlusNameParser.h"
10
11#include "clang/Basic/IdentifierTable.h"
12#include "llvm/ADT/StringMap.h"
13#include "llvm/Support/Threading.h"
14
15using namespace lldb;
16using namespace lldb_private;
17using llvm::Optional;
18using llvm::None;
19using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21namespace tok = clang::tok;
22
23Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24 m_next_token_index = 0;
25 Optional<ParsedFunction> result(None);
26
Adrian Prantl05097242018-04-30 16:49:04 +000027 // Try to parse the name as function without a return type specified e.g.
28 // main(int, char*[])
Eugene Zemtsova633ee62017-04-06 22:36:02 +000029 {
30 Bookmark start_position = SetBookmark();
31 result = ParseFunctionImpl(false);
32 if (result && !HasMoreTokens())
33 return result;
34 }
35
Adrian Prantl05097242018-04-30 16:49:04 +000036 // Try to parse the name as function with function pointer return type e.g.
37 // void (*get_func(const char*))()
Eugene Zemtsova633ee62017-04-06 22:36:02 +000038 result = ParseFuncPtr(true);
39 if (result)
40 return result;
41
42 // Finally try to parse the name as a function with non-function return type
43 // e.g. int main(int, char*[])
44 result = ParseFunctionImpl(true);
45 if (HasMoreTokens())
46 return None;
47 return result;
48}
49
50Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51 m_next_token_index = 0;
52 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53 if (!name_ranges)
54 return None;
55 if (HasMoreTokens())
56 return None;
57 ParsedName result;
58 result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59 result.context = GetTextForRange(name_ranges.getValue().context_range);
60 return result;
61}
62
63bool CPlusPlusNameParser::HasMoreTokens() {
64 return m_next_token_index < m_tokens.size();
65}
66
67void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68
69void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70
71bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72 if (!HasMoreTokens())
73 return false;
74
75 if (!Peek().is(kind))
76 return false;
77
78 Advance();
79 return true;
80}
81
82template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83 if (!HasMoreTokens())
84 return false;
85
86 if (!Peek().isOneOf(kinds...))
87 return false;
88
89 Advance();
90 return true;
91}
92
93CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94 return Bookmark(m_next_token_index);
95}
96
97size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98
99clang::Token &CPlusPlusNameParser::Peek() {
100 assert(HasMoreTokens());
101 return m_tokens[m_next_token_index];
102}
103
104Optional<ParsedFunction>
105CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106 Bookmark start_position = SetBookmark();
107 if (expect_return_type) {
108 // Consume return type if it's expected.
109 if (!ConsumeTypename())
110 return None;
111 }
112
113 auto maybe_name = ParseFullNameImpl();
114 if (!maybe_name) {
115 return None;
116 }
117
118 size_t argument_start = GetCurrentPosition();
119 if (!ConsumeArguments()) {
120 return None;
121 }
122
123 size_t qualifiers_start = GetCurrentPosition();
124 SkipFunctionQualifiers();
125 size_t end_position = GetCurrentPosition();
126
127 ParsedFunction result;
128 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129 result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132 start_position.Remove();
133 return result;
134}
135
136Optional<ParsedFunction>
137CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138 Bookmark start_position = SetBookmark();
139 if (expect_return_type) {
140 // Consume return type.
141 if (!ConsumeTypename())
142 return None;
143 }
144
145 if (!ConsumeToken(tok::l_paren))
146 return None;
147 if (!ConsumePtrsAndRefs())
148 return None;
149
150 {
151 Bookmark before_inner_function_pos = SetBookmark();
152 auto maybe_inner_function_name = ParseFunctionImpl(false);
153 if (maybe_inner_function_name)
154 if (ConsumeToken(tok::r_paren))
155 if (ConsumeArguments()) {
156 SkipFunctionQualifiers();
157 start_position.Remove();
158 before_inner_function_pos.Remove();
159 return maybe_inner_function_name;
160 }
161 }
162
163 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164 if (maybe_inner_function_ptr_name)
165 if (ConsumeToken(tok::r_paren))
166 if (ConsumeArguments()) {
167 SkipFunctionQualifiers();
168 start_position.Remove();
169 return maybe_inner_function_ptr_name;
170 }
171 return None;
172}
173
174bool CPlusPlusNameParser::ConsumeArguments() {
175 return ConsumeBrackets(tok::l_paren, tok::r_paren);
176}
177
178bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179 Bookmark start_position = SetBookmark();
180 if (!HasMoreTokens() || Peek().getKind() != tok::less)
181 return false;
182 Advance();
183
184 // Consuming template arguments is a bit trickier than consuming function
Adrian Prantl05097242018-04-30 16:49:04 +0000185 // arguments, because '<' '>' brackets are not always trivially balanced. In
186 // some rare cases tokens '<' and '>' can appear inside template arguments as
187 // arithmetic or shift operators not as template brackets. Examples:
188 // std::enable_if<(10u)<(64), bool>
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000189 // f<A<operator<(X,Y)::Subclass>>
Adrian Prantl05097242018-04-30 16:49:04 +0000190 // Good thing that compiler makes sure that really ambiguous cases of '>'
191 // usage should be enclosed within '()' brackets.
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000192 int template_counter = 1;
193 bool can_open_template = false;
194 while (HasMoreTokens() && template_counter > 0) {
195 tok::TokenKind kind = Peek().getKind();
196 switch (kind) {
197 case tok::greatergreater:
198 template_counter -= 2;
199 can_open_template = false;
200 Advance();
201 break;
202 case tok::greater:
203 --template_counter;
204 can_open_template = false;
205 Advance();
206 break;
207 case tok::less:
208 // '<' is an attempt to open a subteamplte
209 // check if parser is at the point where it's actually possible,
Adrian Prantl05097242018-04-30 16:49:04 +0000210 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211 // need to do the same for '>' because compiler actually makes sure that
212 // '>' always surrounded by brackets to avoid ambiguity.
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000213 if (can_open_template)
214 ++template_counter;
215 can_open_template = false;
216 Advance();
217 break;
218 case tok::kw_operator: // C++ operator overloading.
219 if (!ConsumeOperator())
220 return false;
221 can_open_template = true;
222 break;
223 case tok::raw_identifier:
224 can_open_template = true;
225 Advance();
226 break;
227 case tok::l_square:
228 if (!ConsumeBrackets(tok::l_square, tok::r_square))
229 return false;
230 can_open_template = false;
231 break;
232 case tok::l_paren:
233 if (!ConsumeArguments())
234 return false;
235 can_open_template = false;
236 break;
237 default:
238 can_open_template = false;
239 Advance();
240 break;
241 }
242 }
243
Eugene Zemtsov9e916e52018-02-06 19:04:12 +0000244 if (template_counter != 0) {
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000245 return false;
246 }
247 start_position.Remove();
248 return true;
249}
250
251bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252 Bookmark start_position = SetBookmark();
253 if (!ConsumeToken(tok::l_paren)) {
254 return false;
255 }
256 constexpr llvm::StringLiteral g_anonymous("anonymous");
257 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258 Peek().getRawIdentifier() == g_anonymous) {
259 Advance();
260 } else {
261 return false;
262 }
263
264 if (!ConsumeToken(tok::kw_namespace)) {
265 return false;
266 }
267
268 if (!ConsumeToken(tok::r_paren)) {
269 return false;
270 }
271 start_position.Remove();
272 return true;
273}
274
Jim Ingham055e65f2017-07-13 19:48:43 +0000275bool CPlusPlusNameParser::ConsumeLambda() {
276 Bookmark start_position = SetBookmark();
277 if (!ConsumeToken(tok::l_brace)) {
278 return false;
279 }
280 constexpr llvm::StringLiteral g_lambda("lambda");
281 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282 Peek().getRawIdentifier() == g_lambda) {
283 // Put the matched brace back so we can use ConsumeBrackets
284 TakeBack();
285 } else {
286 return false;
287 }
288
289 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290 return false;
291 }
292
293 start_position.Remove();
294 return true;
295}
296
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000297bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298 tok::TokenKind right) {
299 Bookmark start_position = SetBookmark();
300 if (!HasMoreTokens() || Peek().getKind() != left)
301 return false;
302 Advance();
303
304 int counter = 1;
305 while (HasMoreTokens() && counter > 0) {
306 tok::TokenKind kind = Peek().getKind();
307 if (kind == right)
308 --counter;
309 else if (kind == left)
310 ++counter;
311 Advance();
312 }
313
314 assert(counter >= 0);
315 if (counter > 0) {
316 return false;
317 }
318 start_position.Remove();
319 return true;
320}
321
322bool CPlusPlusNameParser::ConsumeOperator() {
323 Bookmark start_position = SetBookmark();
324 if (!ConsumeToken(tok::kw_operator))
325 return false;
326
327 if (!HasMoreTokens()) {
328 return false;
329 }
330
331 const auto &token = Peek();
332 switch (token.getKind()) {
333 case tok::kw_new:
334 case tok::kw_delete:
335 // This is 'new' or 'delete' operators.
336 Advance();
337 // Check for array new/delete.
338 if (HasMoreTokens() && Peek().is(tok::l_square)) {
339 // Consume the '[' and ']'.
340 if (!ConsumeBrackets(tok::l_square, tok::r_square))
341 return false;
342 }
343 break;
344
345#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
346 case tok::Token: \
347 Advance(); \
348 break;
349#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
350#include "clang/Basic/OperatorKinds.def"
351#undef OVERLOADED_OPERATOR
352#undef OVERLOADED_OPERATOR_MULTI
353
354 case tok::l_paren:
355 // Call operator consume '(' ... ')'.
356 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
357 break;
358 return false;
359
360 case tok::l_square:
361 // This is a [] operator.
362 // Consume the '[' and ']'.
363 if (ConsumeBrackets(tok::l_square, tok::r_square))
364 break;
365 return false;
366
367 default:
368 // This might be a cast operator.
369 if (ConsumeTypename())
370 break;
371 return false;
372 }
373 start_position.Remove();
374 return true;
375}
376
377void CPlusPlusNameParser::SkipTypeQualifiers() {
378 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
379 ;
380}
381
382void CPlusPlusNameParser::SkipFunctionQualifiers() {
383 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
384 ;
385}
386
387bool CPlusPlusNameParser::ConsumeBuiltinType() {
388 bool result = false;
389 bool continue_parsing = true;
Adrian Prantl05097242018-04-30 16:49:04 +0000390 // Built-in types can be made of a few keywords like 'unsigned long long
391 // int'. This function consumes all built-in type keywords without checking
392 // if they make sense like 'unsigned char void'.
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000393 while (continue_parsing && HasMoreTokens()) {
394 switch (Peek().getKind()) {
395 case tok::kw_short:
396 case tok::kw_long:
397 case tok::kw___int64:
398 case tok::kw___int128:
399 case tok::kw_signed:
400 case tok::kw_unsigned:
401 case tok::kw_void:
402 case tok::kw_char:
403 case tok::kw_int:
404 case tok::kw_half:
405 case tok::kw_float:
406 case tok::kw_double:
407 case tok::kw___float128:
408 case tok::kw_wchar_t:
409 case tok::kw_bool:
410 case tok::kw_char16_t:
411 case tok::kw_char32_t:
412 result = true;
413 Advance();
414 break;
415 default:
416 continue_parsing = false;
417 break;
418 }
419 }
420 return result;
421}
422
423void CPlusPlusNameParser::SkipPtrsAndRefs() {
424 // Ignoring result.
425 ConsumePtrsAndRefs();
426}
427
428bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
429 bool found = false;
430 SkipTypeQualifiers();
431 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
432 tok::kw_volatile)) {
433 found = true;
434 SkipTypeQualifiers();
435 }
436 return found;
437}
438
439bool CPlusPlusNameParser::ConsumeDecltype() {
440 Bookmark start_position = SetBookmark();
441 if (!ConsumeToken(tok::kw_decltype))
442 return false;
443
444 if (!ConsumeArguments())
445 return false;
446
447 start_position.Remove();
448 return true;
449}
450
451bool CPlusPlusNameParser::ConsumeTypename() {
452 Bookmark start_position = SetBookmark();
453 SkipTypeQualifiers();
454 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
455 if (!ParseFullNameImpl())
456 return false;
457 }
458 SkipPtrsAndRefs();
459 start_position.Remove();
460 return true;
461}
462
463Optional<CPlusPlusNameParser::ParsedNameRanges>
464CPlusPlusNameParser::ParseFullNameImpl() {
465 // Name parsing state machine.
466 enum class State {
467 Beginning, // start of the name
468 AfterTwoColons, // right after ::
469 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
470 AfterTemplate, // right after template brackets (<something>)
471 AfterOperator, // right after name of C++ operator
472 };
473
474 Bookmark start_position = SetBookmark();
475 State state = State::Beginning;
476 bool continue_parsing = true;
477 Optional<size_t> last_coloncolon_position = None;
478
479 while (continue_parsing && HasMoreTokens()) {
480 const auto &token = Peek();
481 switch (token.getKind()) {
482 case tok::raw_identifier: // Just a name.
483 if (state != State::Beginning && state != State::AfterTwoColons) {
484 continue_parsing = false;
485 break;
486 }
487 Advance();
488 state = State::AfterIdentifier;
489 break;
490 case tok::l_paren: {
491 if (state == State::Beginning || state == State::AfterTwoColons) {
492 // (anonymous namespace)
493 if (ConsumeAnonymousNamespace()) {
494 state = State::AfterIdentifier;
495 break;
496 }
497 }
498
499 // Type declared inside a function 'func()::Type'
500 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
501 state != State::AfterOperator) {
502 continue_parsing = false;
503 break;
504 }
505 Bookmark l_paren_position = SetBookmark();
506 // Consume the '(' ... ') [const]'.
507 if (!ConsumeArguments()) {
508 continue_parsing = false;
509 break;
510 }
511 SkipFunctionQualifiers();
512
513 // Consume '::'
514 size_t coloncolon_position = GetCurrentPosition();
515 if (!ConsumeToken(tok::coloncolon)) {
516 continue_parsing = false;
517 break;
518 }
519 l_paren_position.Remove();
520 last_coloncolon_position = coloncolon_position;
521 state = State::AfterTwoColons;
522 break;
523 }
Jim Ingham055e65f2017-07-13 19:48:43 +0000524 case tok::l_brace:
525 if (state == State::Beginning || state == State::AfterTwoColons) {
526 if (ConsumeLambda()) {
527 state = State::AfterIdentifier;
528 break;
529 }
530 }
531 continue_parsing = false;
532 break;
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000533 case tok::coloncolon: // Type nesting delimiter.
534 if (state != State::Beginning && state != State::AfterIdentifier &&
535 state != State::AfterTemplate) {
536 continue_parsing = false;
537 break;
538 }
539 last_coloncolon_position = GetCurrentPosition();
540 Advance();
541 state = State::AfterTwoColons;
542 break;
543 case tok::less: // Template brackets.
544 if (state != State::AfterIdentifier && state != State::AfterOperator) {
545 continue_parsing = false;
546 break;
547 }
548 if (!ConsumeTemplateArgs()) {
549 continue_parsing = false;
550 break;
551 }
552 state = State::AfterTemplate;
553 break;
554 case tok::kw_operator: // C++ operator overloading.
555 if (state != State::Beginning && state != State::AfterTwoColons) {
556 continue_parsing = false;
557 break;
558 }
559 if (!ConsumeOperator()) {
560 continue_parsing = false;
561 break;
562 }
563 state = State::AfterOperator;
564 break;
565 case tok::tilde: // Destructor.
566 if (state != State::Beginning && state != State::AfterTwoColons) {
567 continue_parsing = false;
568 break;
569 }
570 Advance();
571 if (ConsumeToken(tok::raw_identifier)) {
572 state = State::AfterIdentifier;
573 } else {
574 TakeBack();
575 continue_parsing = false;
576 }
577 break;
578 default:
579 continue_parsing = false;
580 break;
581 }
582 }
583
584 if (state == State::AfterIdentifier || state == State::AfterOperator ||
585 state == State::AfterTemplate) {
586 ParsedNameRanges result;
587 if (last_coloncolon_position) {
588 result.context_range = Range(start_position.GetSavedPosition(),
589 last_coloncolon_position.getValue());
590 result.basename_range =
591 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
592 } else {
593 result.basename_range =
594 Range(start_position.GetSavedPosition(), GetCurrentPosition());
595 }
596 start_position.Remove();
597 return result;
598 } else {
599 return None;
600 }
601}
602
603llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
604 if (range.empty())
605 return llvm::StringRef();
606 assert(range.begin_index < range.end_index);
607 assert(range.begin_index < m_tokens.size());
608 assert(range.end_index <= m_tokens.size());
609 clang::Token &first_token = m_tokens[range.begin_index];
610 clang::Token &last_token = m_tokens[range.end_index - 1];
611 clang::SourceLocation start_loc = first_token.getLocation();
612 clang::SourceLocation end_loc = last_token.getLocation();
613 unsigned start_pos = start_loc.getRawEncoding();
614 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
615 return m_text.take_front(end_pos).drop_front(start_pos);
616}
617
618static const clang::LangOptions &GetLangOptions() {
619 static clang::LangOptions g_options;
620 static llvm::once_flag g_once_flag;
621 llvm::call_once(g_once_flag, []() {
622 g_options.LineComment = true;
623 g_options.C99 = true;
624 g_options.C11 = true;
625 g_options.CPlusPlus = true;
626 g_options.CPlusPlus11 = true;
627 g_options.CPlusPlus14 = true;
Aaron Ballman05b618e2017-12-04 20:46:43 +0000628 g_options.CPlusPlus17 = true;
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000629 });
630 return g_options;
631}
632
633static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
634 static llvm::StringMap<tok::TokenKind> g_map{
635#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
636#include "clang/Basic/TokenKinds.def"
637#undef KEYWORD
638 };
639 return g_map;
640}
641
642void CPlusPlusNameParser::ExtractTokens() {
Adrian Prantl4a585a32019-05-31 00:18:42 +0000643 if (m_text.empty())
644 return;
Eugene Zemtsova633ee62017-04-06 22:36:02 +0000645 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
646 m_text.data(), m_text.data() + m_text.size());
647 const auto &kw_map = GetKeywordsMap();
648 clang::Token token;
649 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
650 lexer.LexFromRawLexer(token)) {
651 if (token.is(clang::tok::raw_identifier)) {
652 auto it = kw_map.find(token.getRawIdentifier());
653 if (it != kw_map.end()) {
654 token.setKind(it->getValue());
655 }
656 }
657
658 m_tokens.push_back(token);
659 }
660}