Introduce a new lexer function to compute the "preamble" of a file,
which is the part of the file that contains all of the initial
comments, includes, and preprocessor directives that occur before any
of the actual code. Added a new -print-preamble cc1 action that is
only used for testing.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108913 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index 4502401..72dca18 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -304,6 +304,9 @@
HelpText<"Apply fix-it advice creating a file with the given suffix">;
def parse_print_callbacks : Flag<"-parse-print-callbacks">,
HelpText<"Run parser and print each callback invoked">;
+def print_preamble : Flag<"-print-preamble">,
+ HelpText<"Print the \"preamble\" of a file, which is a candidate for implicit"
+ " precompiled headers.">;
def emit_html : Flag<"-emit-html">,
HelpText<"Output input source as HTML">;
def ast_print : Flag<"-ast-print">,
diff --git a/include/clang/Frontend/FrontendActions.h b/include/clang/Frontend/FrontendActions.h
index 26262cf..c172066 100644
--- a/include/clang/Frontend/FrontendActions.h
+++ b/include/clang/Frontend/FrontendActions.h
@@ -134,6 +134,16 @@
virtual bool hasCodeCompletionSupport() const;
};
+class PrintPreambleAction : public FrontendAction {
+protected:
+ void ExecuteAction();
+ virtual ASTConsumer *CreateASTConsumer(CompilerInstance &, llvm::StringRef) {
+ return 0;
+ }
+
+ virtual bool usesPreprocessorOnly() const { return true; }
+};
+
//===----------------------------------------------------------------------===//
// Preprocessor Actions
//===----------------------------------------------------------------------===//
@@ -174,7 +184,7 @@
virtual bool hasPCHSupport() const { return true; }
};
-
+
} // end namespace clang
#endif
diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h
index 4010ea6..263e571 100644
--- a/include/clang/Frontend/FrontendOptions.h
+++ b/include/clang/Frontend/FrontendOptions.h
@@ -44,6 +44,7 @@
ParseSyntaxOnly, ///< Parse and perform semantic analysis.
PluginAction, ///< Run a plugin action, \see ActionName.
PrintDeclContext, ///< Print DeclContext and their Decls.
+ PrintPreamble, ///< Print the "preamble" of the input file
PrintPreprocessedInput, ///< -E mode.
RewriteMacros, ///< Expand macros but not #includes.
RewriteObjC, ///< ObjC->C Rewriter.
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 6a6e319..96d7605 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -219,6 +219,19 @@
const SourceManager &SM,
const LangOptions &LangOpts);
+ /// \brief Compute the preamble of the given file.
+ ///
+ /// The preamble of a file contains the initial comments, include directives,
+ /// and other preprocessor directives that occur before the code in this
+ /// particular file actually begins. The preamble of the main source file is
+ /// a potential prefix header.
+ ///
+ /// \param Buffer The memory buffer containing the file's contents.
+ ///
+ /// \returns The offset into the file where the preamble ends and the rest
+ /// of the file begins.
+ static unsigned ComputePreamble(const llvm::MemoryBuffer *Buffer);
+
//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
private:
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 00363d9..b007ac4 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -331,6 +331,7 @@
case frontend::ParsePrintCallbacks: return "-parse-print-callbacks";
case frontend::ParseSyntaxOnly: return "-fsyntax-only";
case frontend::PrintDeclContext: return "-print-decl-contexts";
+ case frontend::PrintPreamble: return "-print-preamble";
case frontend::PrintPreprocessedInput: return "-E";
case frontend::RewriteMacros: return "-rewrite-macros";
case frontend::RewriteObjC: return "-rewrite-objc";
@@ -989,6 +990,8 @@
Opts.ProgramAction = frontend::ParseSyntaxOnly; break;
case OPT_print_decl_contexts:
Opts.ProgramAction = frontend::PrintDeclContext; break;
+ case OPT_print_preamble:
+ Opts.ProgramAction = frontend::PrintPreamble; break;
case OPT_E:
Opts.ProgramAction = frontend::PrintPreprocessedInput; break;
case OPT_rewrite_macros:
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 3a53dee..4db9c11 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -19,6 +19,7 @@
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
@@ -192,3 +193,32 @@
DoPrintPreprocessedInput(CI.getPreprocessor(), OS,
CI.getPreprocessorOutputOpts());
}
+
+void PrintPreambleAction::ExecuteAction() {
+ switch (getCurrentFileKind()) {
+ case IK_C:
+ case IK_CXX:
+ case IK_ObjC:
+ case IK_ObjCXX:
+ case IK_OpenCL:
+ break;
+
+ case IK_None:
+ case IK_Asm:
+ case IK_PreprocessedC:
+ case IK_PreprocessedCXX:
+ case IK_PreprocessedObjC:
+ case IK_PreprocessedObjCXX:
+ case IK_AST:
+ case IK_LLVM_IR:
+ // We can't do anything with these.
+ return;
+ }
+
+ llvm::MemoryBuffer *Buffer = llvm::MemoryBuffer::getFile(getCurrentFile());
+ if (Buffer) {
+ unsigned Preamble = Lexer::ComputePreamble(Buffer);
+ llvm::outs().write(Buffer->getBufferStart(), Preamble);
+ delete Buffer;
+ }
+}
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 91b14f6..2f11c37 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -28,6 +28,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
@@ -247,6 +248,130 @@
return TheTok.getLength();
}
+namespace {
+ enum PreambleDirectiveKind {
+ PDK_Skipped,
+ PDK_StartIf,
+ PDK_EndIf,
+ PDK_Unknown
+ };
+}
+
+unsigned Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer) {
+ // Create a lexer starting at the beginning of the file. Note that we use a
+ // "fake" file source location at offset 1 so that the lexer will track our
+ // position within the file.
+ const unsigned StartOffset = 1;
+ SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
+ LangOptions LangOpts;
+ Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
+ Buffer->getBufferStart(), Buffer->getBufferEnd());
+
+ bool InPreprocessorDirective = false;
+ Token TheTok;
+ Token IfStartTok;
+ unsigned IfCount = 0;
+ do {
+ TheLexer.LexFromRawLexer(TheTok);
+
+ if (InPreprocessorDirective) {
+ // If we've hit the end of the file, we're done.
+ if (TheTok.getKind() == tok::eof) {
+ InPreprocessorDirective = false;
+ break;
+ }
+
+ // If we haven't hit the end of the preprocessor directive, skip this
+ // token.
+ if (!TheTok.isAtStartOfLine())
+ continue;
+
+ // We've passed the end of the preprocessor directive, and will look
+ // at this token again below.
+ InPreprocessorDirective = false;
+ }
+
+ // Comments are okay; skip over them.
+ if (TheTok.getKind() == tok::comment)
+ continue;
+
+ if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
+ // This is the start of a preprocessor directive.
+ Token HashTok = TheTok;
+ InPreprocessorDirective = true;
+
+ // Figure out which direective this is. Since we're lexing raw tokens,
+ // we don't have an identifier table available. Instead, just look at
+ // the raw identifier to recognize and categorize preprocessor directives.
+ TheLexer.LexFromRawLexer(TheTok);
+ if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
+ const char *IdStart = Buffer->getBufferStart()
+ + TheTok.getLocation().getRawEncoding() - 1;
+ llvm::StringRef Keyword(IdStart, TheTok.getLength());
+ PreambleDirectiveKind PDK
+ = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
+ .Case("include", PDK_Skipped)
+ .Case("__include_macros", PDK_Skipped)
+ .Case("define", PDK_Skipped)
+ .Case("undef", PDK_Skipped)
+ .Case("line", PDK_Skipped)
+ .Case("error", PDK_Skipped)
+ .Case("pragma", PDK_Skipped)
+ .Case("import", PDK_Skipped)
+ .Case("include_next", PDK_Skipped)
+ .Case("warning", PDK_Skipped)
+ .Case("ident", PDK_Skipped)
+ .Case("sccs", PDK_Skipped)
+ .Case("assert", PDK_Skipped)
+ .Case("unassert", PDK_Skipped)
+ .Case("if", PDK_StartIf)
+ .Case("ifdef", PDK_StartIf)
+ .Case("ifndef", PDK_StartIf)
+ .Case("elif", PDK_Skipped)
+ .Case("else", PDK_Skipped)
+ .Case("endif", PDK_EndIf)
+ .Default(PDK_Unknown);
+
+ switch (PDK) {
+ case PDK_Skipped:
+ continue;
+
+ case PDK_StartIf:
+ if (IfCount == 0)
+ IfStartTok = HashTok;
+
+ ++IfCount;
+ continue;
+
+ case PDK_EndIf:
+ // Mismatched #endif. The preamble ends here.
+ if (IfCount == 0)
+ break;
+
+ --IfCount;
+ continue;
+
+ case PDK_Unknown:
+ // We don't know what this directive is; stop at the '#'.
+ break;
+ }
+ }
+
+ // We only end up here if we didn't recognize the preprocessor
+ // directive or it was one that can't occur in the preamble at this
+ // point. Roll back the current token to the location of the '#'.
+ InPreprocessorDirective = false;
+ TheTok = HashTok;
+ }
+
+ // We hit a token
+ break;
+ } while (true);
+
+ SourceLocation End = IfCount? IfStartTok.getLocation() : TheTok.getLocation();
+ return End.getRawEncoding() - StartLoc.getRawEncoding();
+}
+
//===----------------------------------------------------------------------===//
// Character information.
//===----------------------------------------------------------------------===//
diff --git a/test/Lexer/Inputs/preamble.txt b/test/Lexer/Inputs/preamble.txt
new file mode 100644
index 0000000..c5f7288
--- /dev/null
+++ b/test/Lexer/Inputs/preamble.txt
@@ -0,0 +1,11 @@
+// Preamble detection test: see below for comments and test commands.
+
+#include <blah>
+#ifndef FOO
+#else
+#ifdef BAR
+#elif WIBBLE
+#endif
+#pragma unknown
+#endif
+
diff --git a/test/Lexer/preamble.c b/test/Lexer/preamble.c
new file mode 100644
index 0000000..b1f2fad
--- /dev/null
+++ b/test/Lexer/preamble.c
@@ -0,0 +1,25 @@
+// Preamble detection test: see below for comments and test commands.
+
+#include <blah>
+#ifndef FOO
+#else
+#ifdef BAR
+#elif WIBBLE
+#endif
+#pragma unknown
+#endif
+
+#ifdef WIBBLE
+#include "honk"
+#else
+int foo();
+#endif
+
+// This test checks for detection of the preamble of a file, which
+// includes all of the starting comments and #includes. Note that any
+// changes to the preamble part of this file must be mirrored in
+// Inputs/preamble.txt, since we diff against it.
+
+// RUN: %clang_cc1 -print-preamble %s > %t
+// RUN: diff %t %S/Inputs/preamble.txt
+
diff --git a/tools/driver/cc1_main.cpp b/tools/driver/cc1_main.cpp
index 841e40a..3b13f0c 100644
--- a/tools/driver/cc1_main.cpp
+++ b/tools/driver/cc1_main.cpp
@@ -102,6 +102,7 @@
}
case PrintDeclContext: return new DeclContextPrintAction();
+ case PrintPreamble: return new PrintPreambleAction();
case PrintPreprocessedInput: return new PrintPreprocessedAction();
case RewriteMacros: return new RewriteMacrosAction();
case RewriteObjC: return new RewriteObjCAction();