Implement a new identifier-classification scheme where Sema
performs name lookup for an identifier and resolves it to a
type/expression/template/etc. in the same step. This scheme is
intended to improve both performance (by reducing the number of
redundant name lookups for a given identifier token) and error
recovery (by giving Sema a chance to correct type names before the
parser has decided that the identifier isn't a type name). For
example, this allows us to properly typo-correct type names at the
beginning of a statement:
t.c:6:3: error: use of undeclared identifier 'integer'; did you mean
'Integer'?
integer *i = 0;
^~~~~~~
Integer
t.c:1:13: note: 'Integer' declared here
typedef int Integer;
^
Previously, we wouldn't give a Fix-It because the typo correction
occurred after the parser had checked whether "integer" was a type
name (via Sema::getTypeName(), which isn't allowed to typo-correct)
and therefore decided to parse "integer * i = 0" as an expression. By
typo-correcting earlier, we typo-correct to the type name Integer and
parse this as a declaration.
Moreover, in this context, we can also typo-correct identifiers to
keywords, e.g.,
t.c:7:3: error: use of undeclared identifier 'vid'; did you mean
'void'?
vid *p = i;
^~~
void
and recover appropriately.
Note that this is very much a work-in-progress. The new
Sema::ClassifyName is only used for expression-or-declaration
disambiguation in C at the statement level. The next steps will be to
make this work for the same disambiguation in C++ (where
functional-style casts make some trouble), then push it
further into the parser to eliminate more redundant name lookups.
Fixes <rdar://problem/7963833> for C and starts us down the path of
<rdar://problem/8172000>.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@130082 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 0b06d6a..c09a93a 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -724,23 +724,46 @@
// Parse the common declaration-specifiers piece.
ParsingDeclSpec DS(*this);
DS.takeAttributesFrom(attrs);
+ return ParseSimpleDeclaration(DS, Stmts, Context, DeclEnd, RequireSemi, FRI);
+}
+
+/// simple-declaration: [C99 6.7: declaration] [C++ 7p1: dcl.dcl]
+/// declaration-specifiers init-declarator-list[opt] ';'
+///[C90/C++]init-declarator-list ';' [TODO]
+/// [OMP] threadprivate-directive [TODO]
+///
+/// for-range-declaration: [C++0x 6.5p1: stmt.ranged]
+/// attribute-specifier-seq[opt] type-specifier-seq declarator
+///
+/// If RequireSemi is false, this does not check for a ';' at the end of the
+/// declaration. If it is true, it checks for and eats it.
+///
+/// If FRI is non-null, we might be parsing a for-range-declaration instead
+/// of a simple-declaration. If we find that we are, we also parse the
+/// for-range-initializer, and place it here.
+Parser::DeclGroupPtrTy Parser::ParseSimpleDeclaration(ParsingDeclSpec &DS,
+ StmtVector &Stmts,
+ unsigned Context,
+ SourceLocation &DeclEnd,
+ bool RequireSemi,
+ ForRangeInit *FRI) {
ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS_none,
getDeclSpecContextFromDeclaratorContext(Context));
StmtResult R = Actions.ActOnVlaStmt(DS);
if (R.isUsable())
Stmts.push_back(R.release());
-
+
// C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
// declaration-specifiers init-declarator-list[opt] ';'
if (Tok.is(tok::semi)) {
if (RequireSemi) ConsumeToken();
Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
- DS);
+ DS);
DS.complete(TheDecl);
return Actions.ConvertDeclToDeclGroup(TheDecl);
}
-
- return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI);
+
+ return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI);
}
/// ParseDeclGroup - Having concluded that this is either a function
@@ -1254,9 +1277,12 @@
void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
const ParsedTemplateInfo &TemplateInfo,
AccessSpecifier AS,
- DeclSpecContext DSContext) {
- DS.SetRangeStart(Tok.getLocation());
- DS.SetRangeEnd(Tok.getLocation());
+ DeclSpecContext DSContext) {
+ if (DS.getSourceRange().isInvalid()) {
+ DS.SetRangeStart(Tok.getLocation());
+ DS.SetRangeEnd(Tok.getLocation());
+ }
+
while (1) {
bool isInvalid = false;
const char *PrevSpec = 0;
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 0ad153f..cd8f9c5 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -175,8 +175,10 @@
/// assignment-expression ...[opt]
/// expression ',' assignment-expression ...[opt]
///
-ExprResult Parser::ParseExpression() {
- ExprResult LHS(ParseAssignmentExpression());
+/// \param Primary if non-empty, an already-parsed expression that will be used
+/// as the first primary expression.
+ExprResult Parser::ParseExpression(ExprResult Primary) {
+ ExprResult LHS(ParseAssignmentExpression(Primary));
return ParseRHSOfBinaryExpression(move(LHS), prec::Comma);
}
@@ -213,16 +215,26 @@
/// ParseAssignmentExpression - Parse an expr that doesn't include commas.
///
-ExprResult Parser::ParseAssignmentExpression() {
+/// \param Primary if non-empty, an already-parsed expression that will be used
+/// as the first primary expression.
+ExprResult Parser::ParseAssignmentExpression(ExprResult Primary) {
if (Tok.is(tok::code_completion)) {
- Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression);
+ if (Primary.isUsable())
+ Actions.CodeCompletePostfixExpression(getCurScope(), Primary);
+ else
+ Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression);
ConsumeCodeCompletionToken();
}
- if (Tok.is(tok::kw_throw))
+ if (!Primary.isUsable() && Tok.is(tok::kw_throw))
return ParseThrowExpression();
- ExprResult LHS(ParseCastExpression(false));
+ ExprResult LHS;
+ if (Primary.get() || Primary.isInvalid())
+ LHS = ParsePostfixExpressionSuffix(Primary);
+ else
+ LHS = ParseCastExpression(false, false, ParsedType());
+
return ParseRHSOfBinaryExpression(move(LHS), prec::Assignment);
}
@@ -415,8 +427,8 @@
/// due to member pointers.
///
ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
- bool isAddressOfOperand,
- ParsedType TypeOfCast) {
+ bool isAddressOfOperand,
+ ParsedType TypeOfCast) {
bool NotCastExpr;
ExprResult Res = ParseCastExpression(isUnaryExpression,
isAddressOfOperand,
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index 74c2472..8a03864 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -22,6 +22,10 @@
#include "clang/Basic/SourceManager.h"
using namespace clang;
+static bool isColonOrRSquareBracket(const Token &Tok) {
+ return Tok.is(tok::colon) || Tok.is(tok::r_square);
+}
+
//===----------------------------------------------------------------------===//
// C99 6.8: Statements and Blocks.
//===----------------------------------------------------------------------===//
@@ -87,6 +91,7 @@
// Cases in this switch statement should fall through if the parser expects
// the token to end in a semicolon (in which case SemiError should be set),
// or they directly 'return;' if not.
+Retry:
tok::TokenKind Kind = Tok.getKind();
SourceLocation AtLoc;
switch (Kind) {
@@ -101,13 +106,134 @@
ConsumeCodeCompletionToken();
return ParseStatementOrDeclaration(Stmts, OnlyStatement);
- case tok::identifier:
- if (NextToken().is(tok::colon)) { // C99 6.8.1: labeled-statement
+ case tok::identifier: {
+ Token Next = NextToken();
+ if (Next.is(tok::colon)) { // C99 6.8.1: labeled-statement
// identifier ':' statement
return ParseLabeledStatement(attrs);
}
- // PASS THROUGH.
+
+ if (!getLang().CPlusPlus) {
+ // FIXME: Temporarily enable this code only for C.
+ CXXScopeSpec SS;
+ IdentifierInfo *Name = Tok.getIdentifierInfo();
+ SourceLocation NameLoc = Tok.getLocation();
+ Sema::NameClassification Classification
+ = Actions.ClassifyName(getCurScope(), SS, Name, NameLoc, Next);
+ switch (Classification.getKind()) {
+ case Sema::NC_Keyword:
+ // The identifier was corrected to a keyword. Update the token
+ // to this keyword, and try again.
+ if (Name->getTokenID() != tok::identifier) {
+ Tok.setIdentifierInfo(Name);
+ Tok.setKind(Name->getTokenID());
+ goto Retry;
+ }
+
+ // Fall through via the normal error path.
+ // FIXME: This seems like it could only happen for context-sensitive
+ // keywords.
+
+ case Sema::NC_Error:
+ // Handle errors here by skipping up to the next semicolon or '}', and
+ // eat the semicolon if that's what stopped us.
+ SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+ if (Tok.is(tok::semi))
+ ConsumeToken();
+ return StmtError();
+
+ case Sema::NC_Unknown:
+ // Either we don't know anything about this identifier, or we know that
+ // we're in a syntactic context we haven't handled yet.
+ break;
+
+ case Sema::NC_Type:
+ // We have a type.
+ // We have a type. In C, this means that we have a declaration.
+ if (!getLang().CPlusPlus) {
+ ParsedType Type = Classification.getType();
+ const char *PrevSpec = 0;
+ unsigned DiagID;
+ ConsumeToken(); // the identifier
+ ParsingDeclSpec DS(*this);
+ DS.takeAttributesFrom(attrs);
+ DS.SetTypeSpecType(DeclSpec::TST_typename, NameLoc, PrevSpec, DiagID,
+ Type);
+ DS.SetRangeStart(NameLoc);
+ DS.SetRangeEnd(NameLoc);
+
+ // In Objective-C, check whether this is the start of a class message
+ // send that is missing an opening square bracket ('[').
+ if (getLang().ObjC1 && Tok.is(tok::identifier) &&
+ Type.get()->isObjCObjectOrInterfaceType() &&
+ isColonOrRSquareBracket(NextToken())) {
+ // Fake up a Declarator to use with ActOnTypeName.
+ Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+ TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
+ if (Ty.isInvalid()) {
+ SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+ if (Tok.is(tok::semi))
+ ConsumeToken();
+ return StmtError();
+ }
+
+ ExprResult MsgExpr = ParseObjCMessageExpressionBody(SourceLocation(),
+ SourceLocation(),
+ Ty.get(), 0);
+ return ParseExprStatement(attrs, MsgExpr);
+ }
+
+ // Objective-C supports syntax of the form 'id<proto1,proto2>' where
+ // 'id' is a specific typedef and 'itf<proto1,proto2>' where 'itf' is
+ // an Objective-C interface.
+ if (Tok.is(tok::less) && getLang().ObjC1)
+ ParseObjCProtocolQualifiers(DS);
+ SourceLocation DeclStart = NameLoc, DeclEnd;
+ DeclGroupPtrTy Decl = ParseSimpleDeclaration(DS, Stmts,
+ Declarator::BlockContext,
+ DeclEnd, true);
+ return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd);
+ }
+
+ // In C++, we might also have a functional-style cast.
+ // FIXME: Implement this!
+ break;
+
+ case Sema::NC_Expression:
+ ConsumeToken(); // the identifier
+ return ParseExprStatement(attrs, Classification.getExpression());
+
+ case Sema::NC_TypeTemplate:
+ case Sema::NC_FunctionTemplate: {
+ ConsumeToken(); // the identifier
+ UnqualifiedId Id;
+ Id.setIdentifier(Name, NameLoc);
+ if (AnnotateTemplateIdToken(
+ TemplateTy::make(Classification.getTemplateName()),
+ Classification.getTemplateNameKind(),
+ SS, Id)) {
+ // Handle errors here by skipping up to the next semicolon or '}', and
+ // eat the semicolon if that's what stopped us.
+ SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+ if (Tok.is(tok::semi))
+ ConsumeToken();
+ return StmtError();
+ }
+
+ // We've annotated a template-id, so try again now.
+ goto Retry;
+ }
+
+ case Sema::NC_NestedNameSpecifier:
+ // FIXME: Implement this!
+ break;
+ }
+ }
+
+ // Fall through
+ }
+
default: {
if ((getLang().CPlusPlus || !OnlyStatement) && isDeclarationStatement()) {
SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
@@ -121,36 +247,7 @@
return StmtError();
}
- // If a case keyword is missing, this is where it should be inserted.
- Token OldToken = Tok;
-
- // FIXME: Use the attributes
- // expression[opt] ';'
- ExprResult Expr(ParseExpression());
- if (Expr.isInvalid()) {
- // If the expression is invalid, skip ahead to the next semicolon or '}'.
- // Not doing this opens us up to the possibility of infinite loops if
- // ParseExpression does not consume any tokens.
- SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
- if (Tok.is(tok::semi))
- ConsumeToken();
- return StmtError();
- }
-
- if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
- Actions.CheckCaseExpression(Expr.get())) {
- // If a constant expression is followed by a colon inside a switch block,
- // suggest a missing case keywork.
- Diag(OldToken, diag::err_expected_case_before_expression)
- << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
-
- // Recover parsing as a case statement.
- return ParseCaseStatement(attrs, /*MissingCase=*/true, Expr);
- }
-
- // Otherwise, eat the semicolon.
- ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
- return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+ return ParseExprStatement(attrs, ExprResult());
}
case tok::kw_case: // C99 6.8.1: labeled-statement
@@ -225,6 +322,42 @@
return move(Res);
}
+/// \brief Parse an expression statement.
+StmtResult Parser::ParseExprStatement(ParsedAttributes &Attrs,
+ ExprResult Primary) {
+ // If a case keyword is missing, this is where it should be inserted.
+ Token OldToken = Tok;
+
+ // FIXME: Use the attributes
+ // expression[opt] ';'
+ ExprResult Expr(ParseExpression(Primary));
+ if (Expr.isInvalid()) {
+ // If the expression is invalid, skip ahead to the next semicolon or '}'.
+ // Not doing this opens us up to the possibility of infinite loops if
+ // ParseExpression does not consume any tokens.
+ SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+ if (Tok.is(tok::semi))
+ ConsumeToken();
+ return StmtError();
+ }
+
+ if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
+ Actions.CheckCaseExpression(Expr.get())) {
+ // If a constant expression is followed by a colon inside a switch block,
+ // suggest a missing case keyword.
+ Diag(OldToken, diag::err_expected_case_before_expression)
+ << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
+
+ // Recover parsing as a case statement.
+ return ParseCaseStatement(Attrs, /*MissingCase=*/true, Expr);
+ }
+
+ // Otherwise, eat the semicolon.
+ ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
+ return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+
+}
+
/// ParseLabeledStatement - We have an identifier and a ':' after it.
///
/// labeled-statement: