PR41111, PR5925, PR13210: Teach tentative parsing to annotate identifiers and
nested names as id-expressions, using the annot_primary_expr annotation, where
possible. This removes some redundant lookups, and also allows us to
typo-correct within tentative parsing, and to carry on disambiguating past an
identifier which we can determine will fail lookup as both a type and as a
non-type, allowing us to disambiguate more declarations (and thus offer
improved error recovery for such cases).
This also introduces to the parser the notion of a tentatively-declared name,
which is an identifier which we *might* have seen a declaration for in a
tentative parse (but only if we end up disambiguating the tokens as a
declaration). This is necessary to correctly disambiguate cases where a
variable is used within its own initializer.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@162159 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index cb865cc..4d33106 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -3165,6 +3165,8 @@
// anything that's a simple-type-specifier followed by '(' as an
// expression. This suffices because function types are not valid
// underlying types anyway.
+ EnterExpressionEvaluationContext Unevaluated(Actions,
+ Sema::ConstantEvaluated);
TPResult TPR = isExpressionOrTypeSpecifierSimple(NextToken().getKind());
// If the next token starts an expression, we know we're parsing a
// bit-field. This is the common case.
@@ -4374,9 +4376,15 @@
// In such a case, check if we actually have a function declarator; if it
// is not, the declarator has been fully parsed.
bool IsAmbiguous = false;
- if (getLangOpts().CPlusPlus && D.mayBeFollowedByCXXDirectInit() &&
- !isCXXFunctionDeclarator(&IsAmbiguous))
- break;
+ if (getLangOpts().CPlusPlus && D.mayBeFollowedByCXXDirectInit()) {
+ // The name of the declarator, if any, is tentatively declared within
+ // a possible direct initializer.
+ TentativelyDeclaredIdentifiers.push_back(D.getIdentifier());
+ bool IsFunctionDecl = isCXXFunctionDeclarator(&IsAmbiguous);
+ TentativelyDeclaredIdentifiers.pop_back();
+ if (!IsFunctionDecl)
+ break;
+ }
ParsedAttributes attrs(AttrFactory);
BalancedDelimiterTracker T(*this, tok::l_paren);
T.consumeOpen();
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index afac257..f2ba4c6 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -988,6 +988,21 @@
ExprResult Result;
+ // C++0x [expr.typeid]p3:
+ // When typeid is applied to an expression other than an lvalue of a
+ // polymorphic class type [...] The expression is an unevaluated
+ // operand (Clause 5).
+ //
+ // Note that we can't tell whether the expression is an lvalue of a
+ // polymorphic class type until after we've parsed the expression; we
+ // speculatively assume the subexpression is unevaluated, and fix it up
+ // later.
+ //
+ // We enter the unevaluated context before trying to determine whether we
+ // have a type-id, because the tentative parse logic will try to resolve
+ // names, and must treat them as unevaluated.
+ EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated);
+
if (isTypeIdInParens()) {
TypeResult Ty = ParseTypeName();
@@ -1000,16 +1015,6 @@
Result = Actions.ActOnCXXTypeid(OpLoc, LParenLoc, /*isType=*/true,
Ty.get().getAsOpaquePtr(), RParenLoc);
} else {
- // C++0x [expr.typeid]p3:
- // When typeid is applied to an expression other than an lvalue of a
- // polymorphic class type [...] The expression is an unevaluated
- // operand (Clause 5).
- //
- // Note that we can't tell whether the expression is an lvalue of a
- // polymorphic class type until after we've parsed the expression; we
- // speculatively assume the subexpression is unevaluated, and fix it up
- // later.
- EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated);
Result = ParseExpression();
// Match the ')'.
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index df9b996..0716f6f 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -17,6 +17,7 @@
#include "clang/Sema/DeclSpec.h"
#include "clang/Sema/PrettyDeclStackTrace.h"
#include "clang/Sema/Scope.h"
+#include "clang/Sema/TypoCorrection.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/SourceManager.h"
@@ -130,96 +131,38 @@
return ParseLabeledStatement(Attrs);
}
+ // Look up the identifier, and typo-correct it to a keyword if it's not
+ // found.
if (Next.isNot(tok::coloncolon)) {
- CXXScopeSpec SS;
- IdentifierInfo *Name = Tok.getIdentifierInfo();
- SourceLocation NameLoc = Tok.getLocation();
-
- if (getLangOpts().CPlusPlus)
- CheckForTemplateAndDigraph(Next, ParsedType(),
- /*EnteringContext=*/false, *Name, SS);
-
- Sema::NameClassification Classification
- = Actions.ClassifyName(getCurScope(), SS, Name, NameLoc, Next);
- switch (Classification.getKind()) {
- case Sema::NC_Keyword:
- // The identifier was corrected to a keyword. Update the token
- // to this keyword, and try again.
- if (Name->getTokenID() != tok::identifier) {
- Tok.setIdentifierInfo(Name);
- Tok.setKind(Name->getTokenID());
- goto Retry;
- }
-
- // Fall through via the normal error path.
- // FIXME: This seems like it could only happen for context-sensitive
- // keywords.
-
- case Sema::NC_Error:
+ // Try to limit which sets of keywords should be included in typo
+ // correction based on what the next token is.
+ // FIXME: Pass the next token into the CorrectionCandidateCallback and
+ // do this filtering in a more fine-grained manner.
+ CorrectionCandidateCallback DefaultValidator;
+ DefaultValidator.WantTypeSpecifiers =
+ Next.is(tok::l_paren) || Next.is(tok::less) ||
+ Next.is(tok::identifier) || Next.is(tok::star) ||
+ Next.is(tok::amp) || Next.is(tok::l_square);
+ DefaultValidator.WantExpressionKeywords =
+ Next.is(tok::l_paren) || Next.is(tok::identifier) ||
+ Next.is(tok::arrow) || Next.is(tok::period);
+ DefaultValidator.WantRemainingKeywords =
+ Next.is(tok::l_paren) || Next.is(tok::semi) ||
+ Next.is(tok::identifier) || Next.is(tok::l_brace);
+ DefaultValidator.WantCXXNamedCasts = false;
+ if (TryAnnotateName(/*IsAddressOfOperand*/false, &DefaultValidator)
+ == ANK_Error) {
// Handle errors here by skipping up to the next semicolon or '}', and
// eat the semicolon if that's what stopped us.
SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
if (Tok.is(tok::semi))
ConsumeToken();
return StmtError();
+ }
- case Sema::NC_Unknown:
- // Either we don't know anything about this identifier, or we know that
- // we're in a syntactic context we haven't handled yet.
- break;
-
- case Sema::NC_Type:
- Tok.setKind(tok::annot_typename);
- setTypeAnnotation(Tok, Classification.getType());
- Tok.setAnnotationEndLoc(NameLoc);
- PP.AnnotateCachedTokens(Tok);
- break;
-
- case Sema::NC_Expression:
- Tok.setKind(tok::annot_primary_expr);
- setExprAnnotation(Tok, Classification.getExpression());
- Tok.setAnnotationEndLoc(NameLoc);
- PP.AnnotateCachedTokens(Tok);
- break;
-
- case Sema::NC_TypeTemplate:
- case Sema::NC_FunctionTemplate: {
- ConsumeToken(); // the identifier
- UnqualifiedId Id;
- Id.setIdentifier(Name, NameLoc);
- if (AnnotateTemplateIdToken(
- TemplateTy::make(Classification.getTemplateName()),
- Classification.getTemplateNameKind(),
- SS, SourceLocation(), Id,
- /*AllowTypeAnnotation=*/false)) {
- // Handle errors here by skipping up to the next semicolon or '}', and
- // eat the semicolon if that's what stopped us.
- SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
- if (Tok.is(tok::semi))
- ConsumeToken();
- return StmtError();
- }
-
- // If the next token is '::', jump right into parsing a
- // nested-name-specifier. We don't want to leave the template-id
- // hanging.
- if (NextToken().is(tok::coloncolon) && TryAnnotateCXXScopeToken(false)){
- // Handle errors here by skipping up to the next semicolon or '}', and
- // eat the semicolon if that's what stopped us.
- SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
- if (Tok.is(tok::semi))
- ConsumeToken();
- return StmtError();
- }
-
- // We've annotated a template-id, so try again now.
+ // If the identifier was typo-corrected, try again.
+ if (Tok.isNot(tok::identifier))
goto Retry;
- }
-
- case Sema::NC_NestedNameSpecifier:
- // FIXME: Implement this!
- break;
- }
}
// Fall through
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index 1a4df47..653f6c2 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -623,6 +623,8 @@
// declarator-id
if (Tok.is(tok::annot_cxxscope))
ConsumeToken();
+ else
+ TentativelyDeclaredIdentifiers.push_back(Tok.getIdentifierInfo());
ConsumeToken();
} else if (Tok.is(tok::l_paren)) {
ConsumeParen();
@@ -824,6 +826,12 @@
return TPResult::Ambiguous();
}
+bool Parser::isTentativelyDeclared(IdentifierInfo *II) {
+ return std::find(TentativelyDeclaredIdentifiers.begin(),
+ TentativelyDeclaredIdentifiers.end(), II)
+ != TentativelyDeclaredIdentifiers.end();
+}
+
/// isCXXDeclarationSpecifier - Returns TPResult::True() if it is a declaration
/// specifier, TPResult::False() if it is not, TPResult::Ambiguous() if it could
/// be either a decl-specifier or a function-style cast, and TPResult::Error()
@@ -831,7 +839,10 @@
///
/// If HasMissingTypename is provided, a name with a dependent scope specifier
/// will be treated as ambiguous if the 'typename' keyword is missing. If this
-/// happens, *HasMissingTypename will be set to 'true'.
+/// happens, *HasMissingTypename will be set to 'true'. This will also be used
+/// as an indicator that undeclared identifiers (which will trigger a later
+/// parse error) should be treated as types. Returns TPResult::Ambiguous() in
+/// such cases.
///
/// decl-specifier:
/// storage-class-specifier
@@ -927,22 +938,64 @@
Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
bool *HasMissingTypename) {
switch (Tok.getKind()) {
- case tok::identifier: // foo::bar
+ case tok::identifier: {
// Check for need to substitute AltiVec __vector keyword
// for "vector" identifier.
if (TryAltiVecVectorToken())
return TPResult::True();
- // Fall through.
+
+ const Token &Next = NextToken();
+ // In 'foo bar', 'foo' is always a type name outside of Objective-C.
+ if (!getLangOpts().ObjC1 && Next.is(tok::identifier))
+ return TPResult::True();
+
+ if (Next.isNot(tok::coloncolon) && Next.isNot(tok::less)) {
+ // Determine whether this is a valid expression. If not, we will hit
+ // a parse error one way or another. In that case, tell the caller that
+ // this is ambiguous. Typo-correct to type and expression keywords and
+ // to types and identifiers, in order to try to recover from errors.
+ CorrectionCandidateCallback TypoCorrection;
+ TypoCorrection.WantRemainingKeywords = false;
+ switch (TryAnnotateName(false /* no nested name specifier */,
+ &TypoCorrection)) {
+ case ANK_Error:
+ return TPResult::Error();
+ case ANK_TentativeDecl:
+ return TPResult::False();
+ case ANK_TemplateName:
+ // A bare type template-name which can't be a template template
+ // argument is an error, and was probably intended to be a type.
+ return GreaterThanIsOperator ? TPResult::True() : TPResult::False();
+ case ANK_Unresolved:
+ return HasMissingTypename ? TPResult::Ambiguous() : TPResult::False();
+ case ANK_Success:
+ break;
+ }
+ assert(Tok.isNot(tok::identifier) &&
+ "TryAnnotateName succeeded without producing an annotation");
+ } else {
+ // This might possibly be a type with a dependent scope specifier and
+ // a missing 'typename' keyword. Don't use TryAnnotateName in this case,
+ // since it will annotate as a primary expression, and we want to use the
+ // "missing 'typename'" logic.
+ if (TryAnnotateTypeOrScopeToken())
+ return TPResult::Error();
+ // If annotation failed, assume it's a non-type.
+ // FIXME: If this happens due to an undeclared identifier, treat it as
+ // ambiguous.
+ if (Tok.is(tok::identifier))
+ return TPResult::False();
+ }
+
+ // We annotated this token as something. Recurse to handle whatever we got.
+ return isCXXDeclarationSpecifier(BracedCastResult, HasMissingTypename);
+ }
+
case tok::kw_typename: // typename T::type
// Annotate typenames and C++ scope specifiers. If we get one, just
// recurse to handle whatever we get.
if (TryAnnotateTypeOrScopeToken())
return TPResult::Error();
- if (Tok.is(tok::identifier)) {
- const Token &Next = NextToken();
- return (!getLangOpts().ObjC1 && Next.is(tok::identifier)) ?
- TPResult::True() : TPResult::False();
- }
return isCXXDeclarationSpecifier(BracedCastResult, HasMissingTypename);
case tok::coloncolon: { // ::foo::bar
@@ -1073,6 +1126,28 @@
*HasMissingTypename = true;
return TPResult::Ambiguous();
}
+ } else {
+ // Try to resolve the name. If it doesn't exist, assume it was
+ // intended to name a type and keep disambiguating.
+ switch (TryAnnotateName(false /* SS is not dependent */)) {
+ case ANK_Error:
+ return TPResult::Error();
+ case ANK_TentativeDecl:
+ return TPResult::False();
+ case ANK_TemplateName:
+ // A bare type template-name which can't be a template template
+ // argument is an error, and was probably intended to be a type.
+ return GreaterThanIsOperator ? TPResult::True() : TPResult::False();
+ case ANK_Unresolved:
+ return HasMissingTypename ? TPResult::Ambiguous()
+ : TPResult::False();
+ case ANK_Success:
+ // Annotated it, check again.
+ assert(Tok.isNot(tok::annot_cxxscope) ||
+ NextToken().isNot(tok::identifier));
+ return isCXXDeclarationSpecifier(BracedCastResult,
+ HasMissingTypename);
+ }
}
}
return TPResult::False();
diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp
index 3725e2b..bee802f 100644
--- a/lib/Parse/Parser.cpp
+++ b/lib/Parse/Parser.cpp
@@ -1301,6 +1301,143 @@
return Id;
}
+void Parser::AnnotateScopeToken(CXXScopeSpec &SS, bool IsNewAnnotation) {
+ // Push the current token back into the token stream (or revert it if it is
+ // cached) and use an annotation scope token for current token.
+ if (PP.isBacktrackEnabled())
+ PP.RevertCachedTokens(1);
+ else
+ PP.EnterToken(Tok);
+ Tok.setKind(tok::annot_cxxscope);
+ Tok.setAnnotationValue(Actions.SaveNestedNameSpecifierAnnotation(SS));
+ Tok.setAnnotationRange(SS.getRange());
+
+ // In case the tokens were cached, have Preprocessor replace them
+ // with the annotation token. We don't need to do this if we've
+ // just reverted back to a prior state.
+ if (IsNewAnnotation)
+ PP.AnnotateCachedTokens(Tok);
+}
+
+/// \brief Attempt to classify the name at the current token position. This may
+/// form a type, scope or primary expression annotation, or replace the token
+/// with a typo-corrected keyword. This is only appropriate when the current
+/// name must refer to an entity which has already been declared.
+///
+/// \param IsAddressOfOperand Must be \c true if the name is preceded by an '&'
+/// and might possibly have a dependent nested name specifier.
+/// \param CCC Indicates how to perform typo-correction for this name. If NULL,
+/// no typo correction will be performed.
+Parser::AnnotatedNameKind
+Parser::TryAnnotateName(bool IsAddressOfOperand,
+ CorrectionCandidateCallback *CCC) {
+ assert(Tok.is(tok::identifier) || Tok.is(tok::annot_cxxscope));
+
+ const bool EnteringContext = false;
+ const bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);
+
+ CXXScopeSpec SS;
+ if (getLangOpts().CPlusPlus &&
+ ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext))
+ return ANK_Error;
+
+ if (Tok.isNot(tok::identifier) || SS.isInvalid()) {
+ if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(EnteringContext, false, SS,
+ !WasScopeAnnotation))
+ return ANK_Error;
+ return ANK_Unresolved;
+ }
+
+ IdentifierInfo *Name = Tok.getIdentifierInfo();
+ SourceLocation NameLoc = Tok.getLocation();
+
+ // FIXME: Move the tentative declaration logic into ClassifyName so we can
+ // typo-correct to tentatively-declared identifiers.
+ if (isTentativelyDeclared(Name)) {
+ // Identifier has been tentatively declared, and thus cannot be resolved as
+ // an expression. Fall back to annotating it as a type.
+ if (TryAnnotateTypeOrScopeTokenAfterScopeSpec(EnteringContext, false, SS,
+ !WasScopeAnnotation))
+ return ANK_Error;
+ return Tok.is(tok::annot_typename) ? ANK_Success : ANK_TentativeDecl;
+ }
+
+ Token Next = NextToken();
+
+ // Look up and classify the identifier. We don't perform any typo-correction
+ // after a scope specifier, because in general we can't recover from typos
+ // there (eg, after correcting 'A::tempalte B<X>::C', we would need to jump
+ // back into scope specifier parsing).
+ Sema::NameClassification Classification
+ = Actions.ClassifyName(getCurScope(), SS, Name, NameLoc, Next,
+ IsAddressOfOperand, SS.isEmpty() ? CCC : 0);
+
+ switch (Classification.getKind()) {
+ case Sema::NC_Error:
+ return ANK_Error;
+
+ case Sema::NC_Keyword:
+ // The identifier was typo-corrected to a keyword.
+ Tok.setIdentifierInfo(Name);
+ Tok.setKind(Name->getTokenID());
+ PP.TypoCorrectToken(Tok);
+ if (SS.isNotEmpty())
+ AnnotateScopeToken(SS, !WasScopeAnnotation);
+ // We've "annotated" this as a keyword.
+ return ANK_Success;
+
+ case Sema::NC_Unknown:
+ // It's not something we know about. Leave it unannotated.
+ break;
+
+ case Sema::NC_Type:
+ Tok.setKind(tok::annot_typename);
+ setTypeAnnotation(Tok, Classification.getType());
+ Tok.setAnnotationEndLoc(NameLoc);
+ if (SS.isNotEmpty())
+ Tok.setLocation(SS.getBeginLoc());
+ PP.AnnotateCachedTokens(Tok);
+ return ANK_Success;
+
+ case Sema::NC_Expression:
+ Tok.setKind(tok::annot_primary_expr);
+ setExprAnnotation(Tok, Classification.getExpression());
+ Tok.setAnnotationEndLoc(NameLoc);
+ if (SS.isNotEmpty())
+ Tok.setLocation(SS.getBeginLoc());
+ PP.AnnotateCachedTokens(Tok);
+ return ANK_Success;
+
+ case Sema::NC_TypeTemplate:
+ if (Next.isNot(tok::less)) {
+ // This may be a type template being used as a template template argument.
+ if (SS.isNotEmpty())
+ AnnotateScopeToken(SS, !WasScopeAnnotation);
+ return ANK_TemplateName;
+ }
+ // Fall through.
+ case Sema::NC_FunctionTemplate: {
+ // We have a type or function template followed by '<'.
+ ConsumeToken();
+ UnqualifiedId Id;
+ Id.setIdentifier(Name, NameLoc);
+ if (AnnotateTemplateIdToken(
+ TemplateTy::make(Classification.getTemplateName()),
+ Classification.getTemplateNameKind(), SS, SourceLocation(), Id))
+ return ANK_Error;
+ return ANK_Success;
+ }
+
+ case Sema::NC_NestedNameSpecifier:
+ llvm_unreachable("already parsed nested name specifier");
+ }
+
+ // Unable to classify the name, but maybe we can annotate a scope specifier.
+ if (SS.isNotEmpty())
+ AnnotateScopeToken(SS, !WasScopeAnnotation);
+ return ANK_Unresolved;
+}
+
/// TryAnnotateTypeOrScopeToken - If the current token position is on a
/// typename (possibly qualified in C++) or a C++ scope specifier not followed
/// by a typename, TryAnnotateTypeOrScopeToken will replace one or more tokens
@@ -1404,13 +1541,24 @@
}
// Remembers whether the token was originally a scope annotation.
- bool wasScopeAnnotation = Tok.is(tok::annot_cxxscope);
+ bool WasScopeAnnotation = Tok.is(tok::annot_cxxscope);
CXXScopeSpec SS;
if (getLangOpts().CPlusPlus)
if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), EnteringContext))
return true;
+ return TryAnnotateTypeOrScopeTokenAfterScopeSpec(EnteringContext, NeedType,
+ SS, !WasScopeAnnotation);
+}
+
+/// \brief Try to annotate a type or scope token, having already parsed an
+/// optional scope specifier. \p IsNewScope should be \c true unless the scope
+/// specifier was extracted from an existing tok::annot_cxxscope annotation.
+bool Parser::TryAnnotateTypeOrScopeTokenAfterScopeSpec(bool EnteringContext,
+ bool NeedType,
+ CXXScopeSpec &SS,
+ bool IsNewScope) {
if (Tok.is(tok::identifier)) {
IdentifierInfo *CorrectedII = 0;
// Determine whether the identifier is a type name.
@@ -1492,21 +1640,7 @@
return false;
// A C++ scope specifier that isn't followed by a typename.
- // Push the current token back into the token stream (or revert it if it is
- // cached) and use an annotation scope token for current token.
- if (PP.isBacktrackEnabled())
- PP.RevertCachedTokens(1);
- else
- PP.EnterToken(Tok);
- Tok.setKind(tok::annot_cxxscope);
- Tok.setAnnotationValue(Actions.SaveNestedNameSpecifierAnnotation(SS));
- Tok.setAnnotationRange(SS.getRange());
-
- // In case the tokens were cached, have Preprocessor replace them
- // with the annotation token. We don't need to do this if we've
- // just reverted back to the state we were in before being called.
- if (!wasScopeAnnotation)
- PP.AnnotateCachedTokens(Tok);
+ AnnotateScopeToken(SS, IsNewScope);
return false;
}
@@ -1529,19 +1663,7 @@
if (SS.isEmpty())
return false;
- // Push the current token back into the token stream (or revert it if it is
- // cached) and use an annotation scope token for current token.
- if (PP.isBacktrackEnabled())
- PP.RevertCachedTokens(1);
- else
- PP.EnterToken(Tok);
- Tok.setKind(tok::annot_cxxscope);
- Tok.setAnnotationValue(Actions.SaveNestedNameSpecifierAnnotation(SS));
- Tok.setAnnotationRange(SS.getRange());
-
- // In case the tokens were cached, have Preprocessor replace them with the
- // annotation token.
- PP.AnnotateCachedTokens(Tok);
+ AnnotateScopeToken(SS, true);
return false;
}