Implement a new identifier-classification scheme where Sema performs name lookup for an identifier and resolves it to a type/expression/template/etc. in the same step. This scheme is intended to improve both performance (by reducing the number of redundant name lookups for a given identifier token) and error recovery (by giving Sema a chance to correct type names before the parser has decided that the identifier isn't a type name). For example, this allows us to properly typo-correct type names at the beginning of a statement: t.c:6:3: error: use of undeclared identifier 'integer'; did you mean 'Integer'? integer *i = 0; ^~~~~~~ Integer t.c:1:13: note: 'Integer' declared here typedef int Integer; ^ Previously, we wouldn't give a Fix-It because the typo correction occurred after the parser had checked whether "integer" was a type name (via Sema::getTypeName(), which isn't allowed to typo-correct) and therefore decided to parse "integer * i = 0" as an expression. By typo-correcting earlier, we typo-correct to the type name Integer and parse this as a declaration. Moreover, in this context, we can also typo-correct identifiers to keywords, e.g., t.c:7:3: error: use of undeclared identifier 'vid'; did you mean 'void'? vid *p = i; ^~~ void and recover appropriately. Note that this is very much a work-in-progress. The new Sema::ClassifyName is only used for expression-or-declaration disambiguation in C at the statement level. The next steps will be to make this work for the same disambiguation in C++ (where functional-style casts make some trouble), then push it further into the parser to eliminate more redundant name lookups. Fixes <rdar://problem/7963833> for C and starts us down the path of <rdar://problem/8172000>. llvm-svn: 130082

commit: 0e7dde535d5312a67038bff3d1ed31f085f31e02 [log] [tgz]
author: Douglas Gregor <dgregor@apple.com> Sun Apr 24 05:37:28 2011 +0000
committer: Douglas Gregor <dgregor@apple.com> Sun Apr 24 05:37:28 2011 +0000
tree: 9bdfbaefa433f093b88c95ac03ca1fdd4ac6ca0c
parent: d8d1e2ae665ccbd205df9cc16933227541e4e3b7 [diff] [blame]
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index 74c2472..8a03864a 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp

@@ -22,6 +22,10 @@
 #include "clang/Basic/SourceManager.h"
 using namespace clang;
 
+static bool isColonOrRSquareBracket(const Token &Tok) {
+  return Tok.is(tok::colon) || Tok.is(tok::r_square);
+}
+
 //===----------------------------------------------------------------------===//
 // C99 6.8: Statements and Blocks.
 //===----------------------------------------------------------------------===//
@@ -87,6 +91,7 @@
   // Cases in this switch statement should fall through if the parser expects
   // the token to end in a semicolon (in which case SemiError should be set),
   // or they directly 'return;' if not.
+Retry:
   tok::TokenKind Kind  = Tok.getKind();
   SourceLocation AtLoc;
   switch (Kind) {
@@ -101,13 +106,134 @@
     ConsumeCodeCompletionToken();
     return ParseStatementOrDeclaration(Stmts, OnlyStatement);
       
-  case tok::identifier:
-    if (NextToken().is(tok::colon)) { // C99 6.8.1: labeled-statement
+  case tok::identifier: {
+    Token Next = NextToken();
+    if (Next.is(tok::colon)) { // C99 6.8.1: labeled-statement
       // identifier ':' statement
       return ParseLabeledStatement(attrs);
     }
-    // PASS THROUGH.
+    
+    if (!getLang().CPlusPlus) {
+      // FIXME: Temporarily enable this code only for C.
+      CXXScopeSpec SS;
+      IdentifierInfo *Name = Tok.getIdentifierInfo();
+      SourceLocation NameLoc = Tok.getLocation();
+      Sema::NameClassification Classification
+        = Actions.ClassifyName(getCurScope(), SS, Name, NameLoc, Next);
+      switch (Classification.getKind()) {
+      case Sema::NC_Keyword:
+        // The identifier was corrected to a keyword. Update the token
+        // to this keyword, and try again.
+        if (Name->getTokenID() != tok::identifier) {
+          Tok.setIdentifierInfo(Name);
+          Tok.setKind(Name->getTokenID());
+          goto Retry;
+        }
+          
+        // Fall through via the normal error path.
+        // FIXME: This seems like it could only happen for context-sensitive
+        // keywords.
+          
+      case Sema::NC_Error:
+        // Handle errors here by skipping up to the next semicolon or '}', and
+        // eat the semicolon if that's what stopped us.
+        SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+        if (Tok.is(tok::semi))
+          ConsumeToken();
+        return StmtError();
+                     
+      case Sema::NC_Unknown:
+        // Either we don't know anything about this identifier, or we know that
+        // we're in a syntactic context we haven't handled yet. 
+        break;     
+          
+      case Sema::NC_Type:
+        // We have a type.
+        // We have a type. In C, this means that we have a declaration.
+        if (!getLang().CPlusPlus) {
+          ParsedType Type = Classification.getType();
+          const char *PrevSpec = 0;
+          unsigned DiagID;
+          ConsumeToken(); // the identifier
+          ParsingDeclSpec DS(*this);
+          DS.takeAttributesFrom(attrs);
+          DS.SetTypeSpecType(DeclSpec::TST_typename, NameLoc, PrevSpec, DiagID,
+                             Type);
+          DS.SetRangeStart(NameLoc);
+          DS.SetRangeEnd(NameLoc);
+          
+          // In Objective-C, check whether this is the start of a class message
+          // send that is missing an opening square bracket ('[').
+          if (getLang().ObjC1 && Tok.is(tok::identifier) && 
+              Type.get()->isObjCObjectOrInterfaceType() &&
+              isColonOrRSquareBracket(NextToken())) {
+            // Fake up a Declarator to use with ActOnTypeName.
+            Declarator DeclaratorInfo(DS, Declarator::TypeNameContext);
+            TypeResult Ty = Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
+            if (Ty.isInvalid()) {
+              SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+              if (Tok.is(tok::semi))
+                ConsumeToken();
+              return StmtError();
+            }
+            
+            ExprResult MsgExpr = ParseObjCMessageExpressionBody(SourceLocation(), 
+                                                                SourceLocation(),
+                                                                Ty.get(), 0);
+            return ParseExprStatement(attrs, MsgExpr);
+          }
+          
+          // Objective-C supports syntax of the form 'id<proto1,proto2>' where 
+          // 'id' is a specific typedef and 'itf<proto1,proto2>' where 'itf' is 
+          // an Objective-C interface. 
+          if (Tok.is(tok::less) && getLang().ObjC1)
+            ParseObjCProtocolQualifiers(DS);
 
+          SourceLocation DeclStart = NameLoc, DeclEnd;
+          DeclGroupPtrTy Decl = ParseSimpleDeclaration(DS, Stmts, 
+                                                       Declarator::BlockContext,
+                                                       DeclEnd, true);
+          return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd);
+        }
+          
+        // In C++, we might also have a functional-style cast.
+        // FIXME: Implement this!
+        break;
+          
+      case Sema::NC_Expression:
+        ConsumeToken(); // the identifier
+        return ParseExprStatement(attrs, Classification.getExpression());
+          
+      case Sema::NC_TypeTemplate:
+      case Sema::NC_FunctionTemplate: {
+        ConsumeToken(); // the identifier
+        UnqualifiedId Id;
+        Id.setIdentifier(Name, NameLoc);
+        if (AnnotateTemplateIdToken(
+                            TemplateTy::make(Classification.getTemplateName()), 
+                                    Classification.getTemplateNameKind(),
+                                    SS, Id)) {
+          // Handle errors here by skipping up to the next semicolon or '}', and
+          // eat the semicolon if that's what stopped us.
+          SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+          if (Tok.is(tok::semi))
+            ConsumeToken();
+          return StmtError();        
+        }
+        
+        // We've annotated a template-id, so try again now.
+        goto Retry;
+      }
+       
+      case Sema::NC_NestedNameSpecifier:
+        // FIXME: Implement this!
+        break;
+      }
+    }
+    
+    // Fall through
+  }
+      
   default: {
     if ((getLang().CPlusPlus || !OnlyStatement) && isDeclarationStatement()) {
       SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
@@ -121,36 +247,7 @@
       return StmtError();
     }
 
-    // If a case keyword is missing, this is where it should be inserted.
-    Token OldToken = Tok;
-
-    // FIXME: Use the attributes
-    // expression[opt] ';'
-    ExprResult Expr(ParseExpression());
-    if (Expr.isInvalid()) {
-      // If the expression is invalid, skip ahead to the next semicolon or '}'.
-      // Not doing this opens us up to the possibility of infinite loops if
-      // ParseExpression does not consume any tokens.
-      SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
-      if (Tok.is(tok::semi))
-        ConsumeToken();
-      return StmtError();
-    }
-
-    if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
-        Actions.CheckCaseExpression(Expr.get())) {
-      // If a constant expression is followed by a colon inside a switch block,
-      // suggest a missing case keywork.
-      Diag(OldToken, diag::err_expected_case_before_expression)
-          << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
-
-      // Recover parsing as a case statement.
-      return ParseCaseStatement(attrs, /*MissingCase=*/true, Expr);
-    }
-
-    // Otherwise, eat the semicolon.
-    ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
-    return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+    return ParseExprStatement(attrs, ExprResult());
   }
 
   case tok::kw_case:                // C99 6.8.1: labeled-statement
@@ -225,6 +322,42 @@
   return move(Res);
 }
 
+/// \brief Parse an expression statement.
+StmtResult Parser::ParseExprStatement(ParsedAttributes &Attrs, 
+                                      ExprResult Primary) {
+  // If a case keyword is missing, this is where it should be inserted.
+  Token OldToken = Tok;
+  
+  // FIXME: Use the attributes
+  // expression[opt] ';'
+  ExprResult Expr(ParseExpression(Primary));
+  if (Expr.isInvalid()) {
+    // If the expression is invalid, skip ahead to the next semicolon or '}'.
+    // Not doing this opens us up to the possibility of infinite loops if
+    // ParseExpression does not consume any tokens.
+    SkipUntil(tok::r_brace, /*StopAtSemi=*/true, /*DontConsume=*/true);
+    if (Tok.is(tok::semi))
+      ConsumeToken();
+    return StmtError();
+  }
+  
+  if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() &&
+      Actions.CheckCaseExpression(Expr.get())) {
+    // If a constant expression is followed by a colon inside a switch block,
+    // suggest a missing case keyword.
+    Diag(OldToken, diag::err_expected_case_before_expression)
+      << FixItHint::CreateInsertion(OldToken.getLocation(), "case ");
+    
+    // Recover parsing as a case statement.
+    return ParseCaseStatement(Attrs, /*MissingCase=*/true, Expr);
+  }
+  
+  // Otherwise, eat the semicolon.
+  ExpectAndConsumeSemi(diag::err_expected_semi_after_expr);
+  return Actions.ActOnExprStmt(Actions.MakeFullExpr(Expr.get()));
+
+}
+
 /// ParseLabeledStatement - We have an identifier and a ':' after it.
 ///
 ///       labeled-statement:
commit	0e7dde535d5312a67038bff3d1ed31f085f31e02	[log] [tgz]
author	Douglas Gregor <dgregor@apple.com>	Sun Apr 24 05:37:28 2011 +0000
committer	Douglas Gregor <dgregor@apple.com>	Sun Apr 24 05:37:28 2011 +0000
tree	9bdfbaefa433f093b88c95ac03ca1fdd4ac6ca0c
parent	d8d1e2ae665ccbd205df9cc16933227541e4e3b7 [diff] [blame]