Implement a new identifier-classification scheme where Sema performs name lookup for an identifier and resolves it to a type/expression/template/etc. in the same step. This scheme is intended to improve both performance (by reducing the number of redundant name lookups for a given identifier token) and error recovery (by giving Sema a chance to correct type names before the parser has decided that the identifier isn't a type name). For example, this allows us to properly typo-correct type names at the beginning of a statement: t.c:6:3: error: use of undeclared identifier 'integer'; did you mean 'Integer'? integer *i = 0; ^~~~~~~ Integer t.c:1:13: note: 'Integer' declared here typedef int Integer; ^ Previously, we wouldn't give a Fix-It because the typo correction occurred after the parser had checked whether "integer" was a type name (via Sema::getTypeName(), which isn't allowed to typo-correct) and therefore decided to parse "integer * i = 0" as an expression. By typo-correcting earlier, we typo-correct to the type name Integer and parse this as a declaration. Moreover, in this context, we can also typo-correct identifiers to keywords, e.g., t.c:7:3: error: use of undeclared identifier 'vid'; did you mean 'void'? vid *p = i; ^~~ void and recover appropriately. Note that this is very much a work-in-progress. The new Sema::ClassifyName is only used for expression-or-declaration disambiguation in C at the statement level. The next steps will be to make this work for the same disambiguation in C++ (where functional-style casts make some trouble), then push it further into the parser to eliminate more redundant name lookups. Fixes <rdar://problem/7963833> for C and starts us down the path of <rdar://problem/8172000>. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@130082 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 312eadb832cab4497a069409954500d8192b8f0d [log] [tgz]
author: Douglas Gregor <dgregor@apple.com> Sun Apr 24 05:37:28 2011 +0000
committer: Douglas Gregor <dgregor@apple.com> Sun Apr 24 05:37:28 2011 +0000
tree: f919e5f5b0f8f0e2092ee948b86f92a7171697a2
parent: 1b6005285e234bc30698917b2d3abb2f1f98bc77 [diff]
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 0b06d6a..c09a93a 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp

@@ -724,23 +724,46 @@
   // Parse the common declaration-specifiers piece.
   ParsingDeclSpec DS(*this);
   DS.takeAttributesFrom(attrs);
+  return ParseSimpleDeclaration(DS, Stmts, Context, DeclEnd, RequireSemi, FRI);
+}
+
+///       simple-declaration: [C99 6.7: declaration] [C++ 7p1: dcl.dcl]
+///         declaration-specifiers init-declarator-list[opt] ';'
+///[C90/C++]init-declarator-list ';'                             [TODO]
+/// [OMP]   threadprivate-directive                              [TODO]
+///
+///       for-range-declaration: [C++0x 6.5p1: stmt.ranged]
+///         attribute-specifier-seq[opt] type-specifier-seq declarator
+///
+/// If RequireSemi is false, this does not check for a ';' at the end of the
+/// declaration.  If it is true, it checks for and eats it.
+///
+/// If FRI is non-null, we might be parsing a for-range-declaration instead
+/// of a simple-declaration. If we find that we are, we also parse the
+/// for-range-initializer, and place it here.
+Parser::DeclGroupPtrTy Parser::ParseSimpleDeclaration(ParsingDeclSpec &DS,
+                                                      StmtVector &Stmts,
+                                                      unsigned Context,
+                                                      SourceLocation &DeclEnd,
+                                                      bool RequireSemi,
+                                                      ForRangeInit *FRI) {
   ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS_none,
                              getDeclSpecContextFromDeclaratorContext(Context));
   StmtResult R = Actions.ActOnVlaStmt(DS);
   if (R.isUsable())
     Stmts.push_back(R.release());
-
+  
   // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
   // declaration-specifiers init-declarator-list[opt] ';'
   if (Tok.is(tok::semi)) {
     if (RequireSemi) ConsumeToken();
     Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
-                                                           DS);
+                                                       DS);
     DS.complete(TheDecl);
     return Actions.ConvertDeclToDeclGroup(TheDecl);
   }
-
-  return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI);
+  
+  return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI);  
 }
 
 /// ParseDeclGroup - Having concluded that this is either a function
@@ -1254,9 +1277,12 @@
 void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
                                         const ParsedTemplateInfo &TemplateInfo,
                                         AccessSpecifier AS,
-                                        DeclSpecContext DSContext) {  
-  DS.SetRangeStart(Tok.getLocation());
-  DS.SetRangeEnd(Tok.getLocation());
+                                        DeclSpecContext DSContext) { 
+  if (DS.getSourceRange().isInvalid()) {
+    DS.SetRangeStart(Tok.getLocation());
+    DS.SetRangeEnd(Tok.getLocation());
+  }
+  
   while (1) {
     bool isInvalid = false;
     const char *PrevSpec = 0;

diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 0ad153f..cd8f9c5 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp

@@ -175,8 +175,10 @@
 ///         assignment-expression ...[opt]
 ///         expression ',' assignment-expression ...[opt]
 ///
-ExprResult Parser::ParseExpression() {
-  ExprResult LHS(ParseAssignmentExpression());
+/// \param Primary if non-empty, an already-parsed expression that will be used
+/// as the first primary expression.
+ExprResult Parser::ParseExpression(ExprResult Primary) {
+  ExprResult LHS(ParseAssignmentExpression(Primary));
   return ParseRHSOfBinaryExpression(move(LHS), prec::Comma);
 }
 
@@ -213,16 +215,26 @@
 
 /// ParseAssignmentExpression - Parse an expr that doesn't include commas.
 ///
-ExprResult Parser::ParseAssignmentExpression() {
+/// \param Primary if non-empty, an already-parsed expression that will be used
+/// as the first primary expression.
+ExprResult Parser::ParseAssignmentExpression(ExprResult Primary) {
   if (Tok.is(tok::code_completion)) {
-    Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression);
+    if (Primary.isUsable())
+      Actions.CodeCompletePostfixExpression(getCurScope(), Primary);
+    else
+      Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression);
     ConsumeCodeCompletionToken();
   }
 
-  if (Tok.is(tok::kw_throw))
+  if (!Primary.isUsable() && Tok.is(tok::kw_throw))
     return ParseThrowExpression();
 
-  ExprResult LHS(ParseCastExpression(false));
+  ExprResult LHS;
+  if (Primary.get() || Primary.isInvalid())
+    LHS = ParsePostfixExpressionSuffix(Primary);
+  else
+    LHS = ParseCastExpression(false, false, ParsedType());
+  
   return ParseRHSOfBinaryExpression(move(LHS), prec::Assignment);
 }
 
@@ -415,8 +427,8 @@
 /// due to member pointers.
 ///
 ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
-                                                     bool isAddressOfOperand,
-                                                     ParsedType TypeOfCast) {
+                                       bool isAddressOfOperand,
+                                       ParsedType TypeOfCast) {
   bool NotCastExpr;
   ExprResult Res = ParseCastExpression(isUnaryExpression,
                                        isAddressOfOperand,

diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index 74c2472..8a03864 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp

@@ -22,6 +22,10 @@
 #include "clang/Basic/SourceManager.h"
 using namespace clang;
 
+static bool isColonOrRSquareBracket(const Token &Tok) {
+  return Tok.is(tok::colon) || Tok.is(tok::r_square);
+}
+
 //===----------------------------------------------------------------------===//
 // C99 6.8: Statements and Blocks.
 //===----------------------------------------------------------------------===//
@@ -87,6 +91,7 @@
   // Cases in this switch statement should fall through if the parser expects
   // the token to end in a semicolon (in which case SemiError should be set),
   // or they directly 'return;' if not.
+Retry:
   tok::TokenKind Kind  = Tok.getKind();
   SourceLocation AtLoc;
   switch (Kind) {
@@ -101,13 +106,134 @@
     ConsumeCodeCompletionToken();
     return ParseStatementOrDeclaration(Stmts, OnlyStatement);
       
-  case tok::identifier:
-    if (NextToken().is(tok::colon)) { // C99 6.8.1: labeled-statement
+  case tok::identifier: {
+    Token Next = NextToken();
+    if (Next.is(tok::colon)) { // C99 6.8.1: labeled-statement
       // identifier ':' statement
       return ParseLabeledStatement(attrs);
     }
-    // PASS THROUGH.
+    
+    if (!getLang().CPlusPlus) {
+      // FIXME: Temporarily enable this code only for C.
+      CXXScopeSpec SS;
+      IdentifierInfo *Name = Tok.getIdentifierInfo();
+      SourceLocation NameLoc = Tok.getLocation();
+      Sema::NameClassification Classification
+        = Actions.ClassifyName(getCurScope(), SS, Name, NameLoc, Next);
+      switch (Classification.getKind()) {
+      case Sema::NC_Keyword:
+        // The identifier was corrected to a keyword. Update the token
+        // to this keyword, and try again.
+        if (Name->getTokenID() != tok::identifier) {
+          Tok.setIdentifierInfo(Name);
+          Tok.setKind(Name->getTokenID());
+          goto Retry;
+        }
+          
+        // Fall through via the normal error path.
+        // FIXME: This seems like it could only happen for context-sensitive
+        // keywords.
+          
+      case Sema::NC_Error:
+        // Handle errors here by skipping up to the next semicolon or '}', and
+        // eat the semicolon if that's what stopped us.
+        SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+        if (Tok.is(tok::semi))
+          ConsumeToken();
+        return StmtError();
+                     
+      case Sema::NC_Unknown:
+        // Either we don't know anything about this identifier, or we know that
+        // we're in a syntactic context we haven't handled yet. 
+        break;     
+          
+      case Sema::NC_Type:
+        // We have a type.
+        // We have a type. In C, this means that we have a declaration.
+        if (!getLang().CPlusPlus) {
+          ParsedType Type = Classification.getType();
+          const char *PrevSpec = 0;
+          unsigned DiagID;
+          ConsumeToken(); // the identifier
+          ParsingDeclSpec DS(*this);
+          DS.takeAttributesFrom(attrs);
+          DS.SetTypeSpecType(DeclSpec::TST_typename, NameLoc, PrevSpec, DiagID,
+                             Type);
+          DS.SetRangeStart(NameLoc);
+          DS.SetRangeEnd(NameLoc);
+          
+          // In Objective-C, check whether this is the start of a class message
+          // send that is missing an opening square bracket ('[').
+          if (getLang().ObjC1 && Tok.is(tok::identifier) && 
+              Type.get()->isObjCObjectOrInterfaceType() &&
+              isColonOrRSquareBracket(NextToken())) {
+            // Fake up a Declarator to use with ActOnTypeName.
+            Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+            TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
+            if (Ty.isInvalid()) {
+              SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+              if (Tok.is(tok::semi))
+                ConsumeToken();
+              return StmtError();
+            }
+            
+            ExprResult MsgExpr = ParseObjCMessageExpressionBody(SourceLocation(), 
+                                                                SourceLocation(),
+                                                                Ty.get(), 0);
+            return ParseExprStatement(attrs, MsgExpr);
+          }
+          
+          // Objective-C supports syntax of the form 'id<proto1,proto2>' where 
+          // 'id' is a specific typedef and 'itf<proto1,proto2>' where 'itf' is 
+          // an Objective-C interface. 
+          if (Tok.is(tok::less) && getLang().ObjC1)
+            ParseObjCProtocolQualifiers(DS);
 
+          SourceLocation DeclStart = NameLoc, DeclEnd;
+          DeclGroupPtrTy Decl = ParseSimpleDeclaration(DS, Stmts, 
+                                                       Declarator::BlockContext,
+                                                       DeclEnd, true);
+          return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd);
+        }
+          
+        // In C++, we might also have a functional-style cast.
+        // FIXME: Implement this!
+        break;
+          
+      case Sema::NC_Expression:
+        ConsumeToken(); // the identifier
+        return ParseExprStatement(attrs, Classification.getExpression());
+          
+      case Sema::NC_TypeTemplate:
+      case Sema::NC_FunctionTemplate: {
+        ConsumeToken(); // the identifier
+        UnqualifiedId Id;
+        Id.setIdentifier(Name, NameLoc);
+        if (AnnotateTemplateIdToken(
+                            TemplateTy::make(Classification.getTemplateName()), 
+                                    Classification.getTemplateNameKind(),
+                                    SS, Id)) {
+          // Handle errors here by skipping up to the next semicolon or '}', and
+          // eat the semicolon if that's what stopped us.
+          SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+          if (Tok.is(tok::semi))
+            ConsumeToken();
+          return StmtError();        
+        }
+        
+        // We've annotated a template-id, so try again now.
+        goto Retry;
+      }
+       
+      case Sema::NC_NestedNameSpecifier:
+        // FIXME: Implement this!
+        break;
+      }
+    }
+    
+    // Fall through
+  }
+      
   default: {
     if ((getLang().CPlusPlus || !OnlyStatement) && isDeclarationStatement()) {
       SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
@@ -121,36 +247,7 @@
       return StmtError();
     }
 
-    // If a case keyword is missing, this is where it should be inserted.
-    Token OldToken = Tok;
-
-    // FIXME: Use the attributes
-    // expression[opt] ';'
-    ExprResult Expr(ParseExpression());
-    if (Expr.isInvalid()) {
-      // If the expression is invalid, skip ahead to the next semicolon or '}'.
-      // Not doing this opens us up to the possibility of infinite loops if
-      // ParseExpression does not consume any tokens.
-      SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
-      if (Tok.is(tok::semi))
-        ConsumeToken();
-      return StmtError();
-    }
-
-    if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
-        Actions.CheckCaseExpression(Expr.get())) {
-      // If a constant expression is followed by a colon inside a switch block,
-      // suggest a missing case keywork.
-      Diag(OldToken, diag::err_expected_case_before_expression)
-          << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
-
-      // Recover parsing as a case statement.
-      return ParseCaseStatement(attrs, /*MissingCase=*/true, Expr);
-    }
-
-    // Otherwise, eat the semicolon.
-    ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
-    return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+    return ParseExprStatement(attrs, ExprResult());
   }
 
   case tok::kw_case:                // C99 6.8.1: labeled-statement
@@ -225,6 +322,42 @@
   return move(Res);
 }
 
+/// \brief Parse an expression statement.
+StmtResult Parser::ParseExprStatement(ParsedAttributes &Attrs, 
+                                      ExprResult Primary) {
+  // If a case keyword is missing, this is where it should be inserted.
+  Token OldToken = Tok;
+  
+  // FIXME: Use the attributes
+  // expression[opt] ';'
+  ExprResult Expr(ParseExpression(Primary));
+  if (Expr.isInvalid()) {
+    // If the expression is invalid, skip ahead to the next semicolon or '}'.
+    // Not doing this opens us up to the possibility of infinite loops if
+    // ParseExpression does not consume any tokens.
+    SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+    if (Tok.is(tok::semi))
+      ConsumeToken();
+    return StmtError();
+  }
+  
+  if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
+      Actions.CheckCaseExpression(Expr.get())) {
+    // If a constant expression is followed by a colon inside a switch block,
+    // suggest a missing case keyword.
+    Diag(OldToken, diag::err_expected_case_before_expression)
+      << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
+    
+    // Recover parsing as a case statement.
+    return ParseCaseStatement(Attrs, /*MissingCase=*/true, Expr);
+  }
+  
+  // Otherwise, eat the semicolon.
+  ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
+  return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+
+}
+
 /// ParseLabeledStatement - We have an identifier and a ':' after it.
 ///
 ///       labeled-statement:
commit	312eadb832cab4497a069409954500d8192b8f0d	[log] [tgz]
author	Douglas Gregor <dgregor@apple.com>	Sun Apr 24 05:37:28 2011 +0000
committer	Douglas Gregor <dgregor@apple.com>	Sun Apr 24 05:37:28 2011 +0000
tree	f919e5f5b0f8f0e2092ee948b86f92a7171697a2
parent	1b6005285e234bc30698917b2d3abb2f1f98bc77 [diff]