Add pedantic warning -Wempty-translation-unit (C11 6.9p1).

In standard C since C89, a 'translation-unit' is syntactically defined to have
at least one "external-declaration", which is either a decl or a function
definition. In Clang the latter gives us a declaration as well.

The tricky bit about this warning is that our predefines can contain external
declarations (__builtin_va_list and the 128-bit integer types). Therefore our
AST parser now makes sure we have at least one declaration that doesn't come
from the predefines buffer.

Also, remove bogus warning about empty source files. This doesn't catch source
files that only contain comments, and never fired anyway because of our
predefines.

PR12665 and <rdar://problem/9165548>

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158085 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td
index a5057c9..9811bab 100644
--- a/include/clang/Basic/DiagnosticParseKinds.td
+++ b/include/clang/Basic/DiagnosticParseKinds.td
@@ -20,7 +20,9 @@
 
 let CategoryName = "Parse Issue" in {
 
-def ext_empty_source_file : Extension<"ISO C forbids an empty source file">;
+def ext_empty_translation_unit : Extension<
+  "ISO C requires a translation unit to contain at least one declaration.">,
+  InGroup<DiagGroup<"empty-translation-unit">>;
 def warn_cxx98_compat_top_level_semi : Warning<
   "extra ';' outside of a function is incompatible with C++98">,
   InGroup<CXX98CompatPedantic>, DefaultIgnore;
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h
index 3164f87..f7fb1f5 100644
--- a/include/clang/Basic/SourceManager.h
+++ b/include/clang/Basic/SourceManager.h
@@ -584,6 +584,9 @@
   /// \brief The file ID for the precompiled preamble there is one.
   FileID PreambleFileID;
 
+  /// \brief The file ID for the preprocessor's predefines.
+  FileID PredefinesFileID;
+
   // Statistics for -print-stats.
   mutable unsigned NumLinearScans, NumBinaryProbes;
 
@@ -628,6 +631,14 @@
     MainFileID = createFileIDForMemBuffer(Buffer);
     return MainFileID;
   }
+  
+  /// \brief Create the FileID for a memory buffer that contains the
+  /// preprocessor's predefines.
+  FileID createPredefinesFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) {
+    assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
+    PredefinesFileID = createFileIDForMemBuffer(Buffer);
+    return PredefinesFileID;
+  }
 
   //===--------------------------------------------------------------------===//
   // MainFileID creation and querying methods.
@@ -636,6 +647,9 @@
   /// getMainFileID - Returns the FileID of the main source file.
   FileID getMainFileID() const { return MainFileID; }
 
+  /// \brief Returns the FileID of the preprocessor predefines buffer.
+  FileID getPredefinesFileID() const { return PredefinesFileID; }
+
   /// createMainFileID - Create the FileID for the main source file.
   FileID createMainFileID(const FileEntry *SourceFile, 
                           SrcMgr::CharacteristicKind Kind = SrcMgr::C_User) {
@@ -1113,6 +1127,12 @@
     return getFileID(Loc) == getMainFileID();
   }
 
+  /// isFromPredefines - Returns true if the provided SourceLocation is
+  ///   within the processor's predefines buffer.
+  bool isFromPredefines(SourceLocation Loc) const {
+    return getFileID(Loc) == getPredefinesFileID();
+  }
+
   /// isInSystemHeader - Returns if a SourceLocation is in a system header.
   bool isInSystemHeader(SourceLocation Loc) const {
     return getFileCharacteristic(Loc) != SrcMgr::C_User;
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 07d6320..2222e78 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -730,6 +730,9 @@
 public:
   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID);
   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID);
+  DiagnosticBuilder Diag(unsigned DiagID) {
+    return Diag(Tok, DiagID);
+  }
 
 private:
   void SuggestParentheses(SourceLocation Loc, unsigned DK,
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index ed920eb..783e073 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -407,6 +407,7 @@
 
 void SourceManager::clearIDTables() {
   MainFileID = FileID();
+  PredefinesFileID = FileID();
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
   SLocEntryLoaded.clear();
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index d387f43..955c39c 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -425,7 +425,7 @@
   llvm::MemoryBuffer *SB =
     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
   assert(SB && "Cannot create predefined source buffer");
-  FileID FID = SourceMgr.createFileIDForMemBuffer(SB);
+  FileID FID = SourceMgr.createPredefinesFileIDForMemBuffer(SB);
   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
 
   // Start parsing the predefines.
diff --git a/lib/Parse/ParseAST.cpp b/lib/Parse/ParseAST.cpp
index d1c2624..3f86c4d 100644
--- a/lib/Parse/ParseAST.cpp
+++ b/lib/Parse/ParseAST.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Parse/ParseAST.h"
+#include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/SemaConsumer.h"
@@ -77,27 +78,50 @@
   S.getPreprocessor().EnterMainSourceFile();
   P.Initialize();
   S.Initialize();
-  
-  if (ExternalASTSource *External = S.getASTContext().getExternalSource())
+
+  // C11 6.9p1 says translation units must have at least one top-level
+  // declaration. C++ doesn't have this restriction. We also don't want to
+  // complain if we have a precompiled header, although technically if the PCH
+  // is empty we should still emit the (pedantic) diagnostic.
+  bool WarnForEmptyTU = !S.getLangOpts().CPlusPlus;
+  if (ExternalASTSource *External = S.getASTContext().getExternalSource()) {
     External->StartTranslationUnit(Consumer);
-  
-  bool Abort = false;
+    WarnForEmptyTU = false;
+  }
+
+  // Clang's predefines contain top-level declarations for things like va_list,
+  // making it hard to tell if the /user's/ translation unit has at least one
+  // top-level declaration. So we parse cautiously, looking for a declaration
+  // that doesn't come from our predefines.
+  // Note that ParseTopLevelDecl returns 'true' at EOF.
+  SourceManager &SM = S.getSourceManager();
   Parser::DeclGroupPtrTy ADecl;
-  
-  while (!P.ParseTopLevelDecl(ADecl)) {  // Not end of file.
-    // If we got a null return and something *was* parsed, ignore it.  This
-    // is due to a top-level semicolon, an action override, or a parse error
-    // skipping something.
+  while (WarnForEmptyTU && !P.ParseTopLevelDecl(ADecl)) {
     if (ADecl) {
-      if (!Consumer->HandleTopLevelDecl(ADecl.get())) {
-        Abort = true;
-        break;
+      if (!Consumer->HandleTopLevelDecl(ADecl.get()))
+        return;
+      if (DeclGroupRef::iterator FirstDecl = ADecl.get().begin()) {
+        SourceLocation DeclLoc = (*FirstDecl)->getLocation();
+        WarnForEmptyTU = SM.isFromPredefines(DeclLoc);
       }
     }
-  };
+  }
 
-  if (Abort)
-    return;
+  // If we ended up seeing EOF before any top-level declarations, emit our
+  // diagnostic. Otherwise, parse the rest of the file normally.
+  if (WarnForEmptyTU) {
+    P.Diag(diag::ext_empty_translation_unit);
+  } else {
+    while (!P.ParseTopLevelDecl(ADecl)) {  // Not end of file.
+      // If we got a null return and something *was* parsed, ignore it.  This
+      // is due to a top-level semicolon, an action override, or a parse error
+      // skipping something.
+      if (ADecl) {
+        if (!Consumer->HandleTopLevelDecl(ADecl.get())) 
+          return;
+      }
+    };
+  }
   
   // Process any TopLevelDecls generated by #pragma weak.
   for (SmallVector<Decl*,2>::iterator
diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp
index 5040714..f0e2b3a 100644
--- a/lib/Parse/Parser.cpp
+++ b/lib/Parse/Parser.cpp
@@ -439,10 +439,6 @@
   // Prime the lexer look-ahead.
   ConsumeToken();
 
-  if (Tok.is(tok::eof) &&
-      !getLangOpts().CPlusPlus)  // Empty source file is an extension in C
-    Diag(Tok, diag::ext_empty_source_file);
-
   // Initialization for Objective-C context sensitive keywords recognition.
   // Referenced in Parser::ParseObjCTypeQualifierList.
   if (getLangOpts().ObjC1) {
diff --git a/test/Misc/warning-flags.c b/test/Misc/warning-flags.c
index 98130c5..cdfb38f 100644
--- a/test/Misc/warning-flags.c
+++ b/test/Misc/warning-flags.c
@@ -17,7 +17,7 @@
 
 The list of warnings below should NEVER grow.  It should gradually shrink to 0.
 
-CHECK: Warnings without flags (242):
+CHECK: Warnings without flags (241):
 CHECK-NEXT:   ext_anonymous_struct_union_qualified
 CHECK-NEXT:   ext_binary_literal
 CHECK-NEXT:   ext_cast_fn_obj
@@ -26,7 +26,6 @@
 CHECK-NEXT:   ext_duplicate_declspec
 CHECK-NEXT:   ext_ellipsis_exception_spec
 CHECK-NEXT:   ext_empty_fnmacro_arg
-CHECK-NEXT:   ext_empty_source_file
 CHECK-NEXT:   ext_enum_friend
 CHECK-NEXT:   ext_enum_value_not_int
 CHECK-NEXT:   ext_enumerator_list_comma
diff --git a/test/PCH/empty-with-headers.c b/test/PCH/empty-with-headers.c
new file mode 100644
index 0000000..751be1c
--- /dev/null
+++ b/test/PCH/empty-with-headers.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c99 -pedantic-errors %s
+// RUN: %clang_cc1 -fsyntax-only -std=c99 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fsyntax-only -std=c99 -pedantic-errors -include-pch %t %s
+
+// RUN: %clang_cc1 -fsyntax-only -std=c99 -pedantic-errors -DINCLUDED %s -verify
+// This last one should warn for -Wempty-translation-unit (C99 6.9p1).
+
+#if defined(INCLUDED)
+
+// empty except for the prefix header
+
+#elif defined(HEADER)
+
+typedef int my_int;
+#define INCLUDED
+
+#else
+
+#define HEADER
+#include "empty-with-headers.c"
+// empty except for the header
+
+#endif
+
+// This should only fire if the header is not included,
+// either explicitly or as a prefix header.
+// expected-error{{ISO C requires a translation unit to contain at least one declaration.}}
diff --git a/test/Parser/completely-empty-header-file.h b/test/Parser/completely-empty-header-file.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/Parser/completely-empty-header-file.h
diff --git a/test/Parser/empty-translation-unit.c b/test/Parser/empty-translation-unit.c
new file mode 100644
index 0000000..0dbf37e
--- /dev/null
+++ b/test/Parser/empty-translation-unit.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c99 -pedantic -W -verify %s
+// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++03 -pedantic-errors -W %s
+
+#include "completely-empty-header-file.h"
+// no-warning -- an empty file is OK
+
+#define A_MACRO_IS_NOT_GOOD_ENOUGH 1
+
+// In C we should get this warning, but in C++ we shouldn't.
+// expected-warning{{ISO C requires a translation unit to contain at least one declaration.}}
diff --git a/test/Parser/opencl-pragma.cl b/test/Parser/opencl-pragma.cl
index 1946077..4c48b2a 100644
--- a/test/Parser/opencl-pragma.cl
+++ b/test/Parser/opencl-pragma.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only
+// RUN: %clang_cc1 %s -verify -pedantic -Wno-empty-translation-unit -fsyntax-only
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
diff --git a/test/Preprocessor/undef-error.c b/test/Preprocessor/undef-error.c
index ad611de..959c163 100644
--- a/test/Preprocessor/undef-error.c
+++ b/test/Preprocessor/undef-error.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -pedantic-errors -verify 
+// RUN: %clang_cc1 %s -pedantic-errors -Wno-empty-translation-unit -verify 
 // PR2045
 
 #define b
diff --git a/test/Sema/c89-2.c b/test/Sema/c89-2.c
index f6f6bd9..14b955a 100644
--- a/test/Sema/c89-2.c
+++ b/test/Sema/c89-2.c
@@ -1,4 +1,4 @@
-/* RUN: %clang_cc1 %s -std=c89 -pedantic-errors -verify
+/* RUN: %clang_cc1 %s -std=c89 -pedantic-errors -Wno-empty-translation-unit -verify
  */
 
 #if 1LL        /* expected-error {{long long}} */