blob: cbdc702815d1428cec897895d7d535ce83306d40 [file] [log] [blame]
David Blaikie8c0b3782012-06-06 18:52:13 +00001//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions. This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
Ted Kremenek305c6132012-09-01 05:09:24 +000015#include "clang/Rewrite/Frontend/Rewriters.h"
David Blaikie8c0b3782012-06-06 18:52:13 +000016#include "clang/Basic/SourceManager.h"
17#include "clang/Frontend/PreprocessorOutputOptions.h"
Chandler Carruth55fc8732012-12-04 09:13:33 +000018#include "clang/Lex/Preprocessor.h"
David Blaikie8c0b3782012-06-06 18:52:13 +000019#include "llvm/Support/raw_ostream.h"
20
21using namespace clang;
22using namespace llvm;
23
24namespace {
25
26class InclusionRewriter : public PPCallbacks {
27 /// Information about which #includes were actually performed,
28 /// created by preprocessor callbacks.
29 struct FileChange {
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +000030 const Module *Mod;
David Blaikie8c0b3782012-06-06 18:52:13 +000031 SourceLocation From;
32 FileID Id;
33 SrcMgr::CharacteristicKind FileType;
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +000034 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
David Blaikie8c0b3782012-06-06 18:52:13 +000035 }
36 };
Dmitri Gribenko49fdccb2012-06-08 23:13:42 +000037 Preprocessor &PP; ///< Used to find inclusion directives.
38 SourceManager &SM; ///< Used to read and manage source files.
39 raw_ostream &OS; ///< The destination stream for rewritten contents.
40 bool ShowLineMarkers; ///< Show #line markers.
41 bool UseLineDirective; ///< Use of line directives or line markers.
David Blaikie8c0b3782012-06-06 18:52:13 +000042 typedef std::map<unsigned, FileChange> FileChangeMap;
Dmitri Gribenko959dc842013-02-16 22:21:38 +000043 FileChangeMap FileChanges; ///< Tracks which files were included where.
David Blaikie8c0b3782012-06-06 18:52:13 +000044 /// Used transitively for building up the FileChanges mapping over the
45 /// various \c PPCallbacks callbacks.
46 FileChangeMap::iterator LastInsertedFileChange;
47public:
48 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
49 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
50private:
51 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
52 SrcMgr::CharacteristicKind FileType,
53 FileID PrevFID);
54 virtual void FileSkipped(const FileEntry &ParentFile,
55 const Token &FilenameTok,
56 SrcMgr::CharacteristicKind FileType);
57 virtual void InclusionDirective(SourceLocation HashLoc,
58 const Token &IncludeTok,
59 StringRef FileName,
60 bool IsAngled,
Argyrios Kyrtzidisda313592012-09-27 01:42:07 +000061 CharSourceRange FilenameRange,
David Blaikie8c0b3782012-06-06 18:52:13 +000062 const FileEntry *File,
David Blaikie8c0b3782012-06-06 18:52:13 +000063 StringRef SearchPath,
Argyrios Kyrtzidisf8afcff2012-09-29 01:06:10 +000064 StringRef RelativePath,
65 const Module *Imported);
David Blaikie8c0b3782012-06-06 18:52:13 +000066 void WriteLineInfo(const char *Filename, int Line,
67 SrcMgr::CharacteristicKind FileType,
68 StringRef EOL, StringRef Extra = StringRef());
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +000069 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
David Blaikie8c0b3782012-06-06 18:52:13 +000070 void OutputContentUpTo(const MemoryBuffer &FromFile,
71 unsigned &WriteFrom, unsigned WriteTo,
72 StringRef EOL, int &lines,
73 bool EnsureNewline = false);
74 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
75 const MemoryBuffer &FromFile, StringRef EOL,
76 unsigned &NextToWrite, int &Lines);
77 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
78 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
79};
80
81} // end anonymous namespace
82
83/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
84InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
85 bool ShowLineMarkers)
86 : PP(PP), SM(PP.getSourceManager()), OS(OS),
87 ShowLineMarkers(ShowLineMarkers),
88 LastInsertedFileChange(FileChanges.end()) {
89 // If we're in microsoft mode, use normal #line instead of line markers.
90 UseLineDirective = PP.getLangOpts().MicrosoftExt;
91}
92
93/// Write appropriate line information as either #line directives or GNU line
94/// markers depending on what mode we're in, including the \p Filename and
95/// \p Line we are located at, using the specified \p EOL line separator, and
96/// any \p Extra context specifiers in GNU line directives.
97void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
98 SrcMgr::CharacteristicKind FileType,
99 StringRef EOL, StringRef Extra) {
100 if (!ShowLineMarkers)
101 return;
102 if (UseLineDirective) {
103 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
104 } else {
105 // Use GNU linemarkers as described here:
106 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
107 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
108 if (!Extra.empty())
109 OS << Extra;
110 if (FileType == SrcMgr::C_System)
111 // "`3' This indicates that the following text comes from a system header
112 // file, so certain warnings should be suppressed."
113 OS << " 3";
114 else if (FileType == SrcMgr::C_ExternCSystem)
115 // as above for `3', plus "`4' This indicates that the following text
116 // should be treated as being wrapped in an implicit extern "C" block."
117 OS << " 3 4";
118 }
119 OS << EOL;
120}
121
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000122void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
123 StringRef EOL) {
124 OS << "@import " << Mod->getFullModuleName() << ";"
125 << " /* clang -frewrite-includes: implicit import */" << EOL;
126}
127
David Blaikie8c0b3782012-06-06 18:52:13 +0000128/// FileChanged - Whenever the preprocessor enters or exits a #include file
129/// it invokes this handler.
130void InclusionRewriter::FileChanged(SourceLocation Loc,
131 FileChangeReason Reason,
132 SrcMgr::CharacteristicKind NewFileType,
133 FileID) {
134 if (Reason != EnterFile)
135 return;
136 if (LastInsertedFileChange == FileChanges.end())
137 // we didn't reach this file (eg: the main file) via an inclusion directive
138 return;
139 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
140 LastInsertedFileChange->second.FileType = NewFileType;
141 LastInsertedFileChange = FileChanges.end();
142}
143
144/// Called whenever an inclusion is skipped due to canonical header protection
145/// macros.
146void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
147 const Token &/*FilenameTok*/,
148 SrcMgr::CharacteristicKind /*FileType*/) {
149 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
150 "found via an inclusion directive, was skipped");
151 FileChanges.erase(LastInsertedFileChange);
152 LastInsertedFileChange = FileChanges.end();
153}
154
155/// This should be called whenever the preprocessor encounters include
156/// directives. It does not say whether the file has been included, but it
157/// provides more information about the directive (hash location instead
158/// of location inside the included file). It is assumed that the matching
159/// FileChanged() or FileSkipped() is called after this.
160void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
161 const Token &/*IncludeTok*/,
162 StringRef /*FileName*/,
163 bool /*IsAngled*/,
Argyrios Kyrtzidisda313592012-09-27 01:42:07 +0000164 CharSourceRange /*FilenameRange*/,
David Blaikie8c0b3782012-06-06 18:52:13 +0000165 const FileEntry * /*File*/,
David Blaikie8c0b3782012-06-06 18:52:13 +0000166 StringRef /*SearchPath*/,
Argyrios Kyrtzidisf8afcff2012-09-29 01:06:10 +0000167 StringRef /*RelativePath*/,
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000168 const Module *Imported) {
David Blaikie8c0b3782012-06-06 18:52:13 +0000169 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
170 "directive was found before the previous one was processed");
171 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000172 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
David Blaikie8c0b3782012-06-06 18:52:13 +0000173 assert(p.second && "Unexpected revisitation of the same include directive");
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000174 if (!Imported)
175 LastInsertedFileChange = p.first;
David Blaikie8c0b3782012-06-06 18:52:13 +0000176}
177
178/// Simple lookup for a SourceLocation (specifically one denoting the hash in
179/// an inclusion directive) in the map of inclusion information, FileChanges.
180const InclusionRewriter::FileChange *
181InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
182 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
183 if (I != FileChanges.end())
184 return &I->second;
185 return NULL;
186}
187
David Blaikie8c0b3782012-06-06 18:52:13 +0000188/// Detect the likely line ending style of \p FromFile by examining the first
189/// newline found within it.
190static StringRef DetectEOL(const MemoryBuffer &FromFile) {
191 // detect what line endings the file uses, so that added content does not mix
192 // the style
193 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
194 if (Pos == NULL)
195 return "\n";
196 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
197 return "\n\r";
198 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
199 return "\r\n";
200 return "\n";
201}
202
203/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
204/// \p WriteTo - 1.
205void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
206 unsigned &WriteFrom, unsigned WriteTo,
207 StringRef EOL, int &Line,
208 bool EnsureNewline) {
209 if (WriteTo <= WriteFrom)
210 return;
211 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
212 // count lines manually, it's faster than getPresumedLoc()
Benjamin Kramer31598192012-06-09 13:18:14 +0000213 Line += std::count(FromFile.getBufferStart() + WriteFrom,
214 FromFile.getBufferStart() + WriteTo, '\n');
David Blaikie8c0b3782012-06-06 18:52:13 +0000215 if (EnsureNewline) {
216 char LastChar = FromFile.getBufferStart()[WriteTo - 1];
217 if (LastChar != '\n' && LastChar != '\r')
218 OS << EOL;
219 }
220 WriteFrom = WriteTo;
221}
222
223/// Print characters from \p FromFile starting at \p NextToWrite up until the
224/// inclusion directive at \p StartToken, then print out the inclusion
225/// inclusion directive disabled by a #if directive, updating \p NextToWrite
226/// and \p Line to track the number of source lines visited and the progress
227/// through the \p FromFile buffer.
228void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
229 const Token &StartToken,
230 const MemoryBuffer &FromFile,
231 StringRef EOL,
232 unsigned &NextToWrite, int &Line) {
233 OutputContentUpTo(FromFile, NextToWrite,
234 SM.getFileOffset(StartToken.getLocation()), EOL, Line);
235 Token DirectiveToken;
236 do {
237 DirectiveLex.LexFromRawLexer(DirectiveToken);
238 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
David Blaikie60ad16b2012-06-14 17:36:01 +0000239 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
David Blaikie8c0b3782012-06-06 18:52:13 +0000240 OutputContentUpTo(FromFile, NextToWrite,
241 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
242 EOL, Line);
David Blaikie60ad16b2012-06-14 17:36:01 +0000243 OS << "#endif /* expanded by -frewrite-includes */" << EOL;
David Blaikie8c0b3782012-06-06 18:52:13 +0000244}
245
246/// Find the next identifier in the pragma directive specified by \p RawToken.
247StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
248 Token &RawToken) {
249 RawLex.LexFromRawLexer(RawToken);
250 if (RawToken.is(tok::raw_identifier))
251 PP.LookUpIdentifierInfo(RawToken);
252 if (RawToken.is(tok::identifier))
253 return RawToken.getIdentifierInfo()->getName();
254 return StringRef();
255}
256
257/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
258/// and including content of included files recursively.
259bool InclusionRewriter::Process(FileID FileId,
260 SrcMgr::CharacteristicKind FileType)
261{
262 bool Invalid;
263 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
David Blaikiebae2b312012-06-14 17:36:05 +0000264 if (Invalid) // invalid inclusion
Argyrios Kyrtzidis507d4962013-04-10 01:53:37 +0000265 return false;
David Blaikie8c0b3782012-06-06 18:52:13 +0000266 const char *FileName = FromFile.getBufferIdentifier();
267 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
268 RawLex.SetCommentRetentionState(false);
269
270 StringRef EOL = DetectEOL(FromFile);
271
272 // Per the GNU docs: "1" indicates the start of a new file.
273 WriteLineInfo(FileName, 1, FileType, EOL, " 1");
274
275 if (SM.getFileIDSize(FileId) == 0)
Argyrios Kyrtzidis507d4962013-04-10 01:53:37 +0000276 return false;
David Blaikie8c0b3782012-06-06 18:52:13 +0000277
278 // The next byte to be copied from the source file
279 unsigned NextToWrite = 0;
280 int Line = 1; // The current input file line number.
281
282 Token RawToken;
283 RawLex.LexFromRawLexer(RawToken);
284
285 // TODO: Consider adding a switch that strips possibly unimportant content,
286 // such as comments, to reduce the size of repro files.
287 while (RawToken.isNot(tok::eof)) {
288 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
289 RawLex.setParsingPreprocessorDirective(true);
290 Token HashToken = RawToken;
291 RawLex.LexFromRawLexer(RawToken);
292 if (RawToken.is(tok::raw_identifier))
293 PP.LookUpIdentifierInfo(RawToken);
294 if (RawToken.is(tok::identifier)) {
295 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
296 case tok::pp_include:
297 case tok::pp_include_next:
298 case tok::pp_import: {
299 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
300 Line);
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000301 StringRef LineInfoExtra;
David Blaikie8c0b3782012-06-06 18:52:13 +0000302 if (const FileChange *Change = FindFileChangeLocation(
303 HashToken.getLocation())) {
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000304 if (Change->Mod) {
305 WriteImplicitModuleImport(Change->Mod, EOL);
306
307 // else now include and recursively process the file
308 } else if (Process(Change->Id, Change->FileType)) {
David Blaikie8c0b3782012-06-06 18:52:13 +0000309 // and set lineinfo back to this file, if the nested one was
310 // actually included
311 // `2' indicates returning to a file (after having included
312 // another file.
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000313 LineInfoExtra = " 2";
Argyrios Kyrtzidis507d4962013-04-10 01:53:37 +0000314 }
Argyrios Kyrtzidis03409962013-04-10 01:53:50 +0000315 }
316 // fix up lineinfo (since commented out directive changed line
317 // numbers) for inclusions that were skipped due to header guards
318 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
David Blaikie8c0b3782012-06-06 18:52:13 +0000319 break;
320 }
321 case tok::pp_pragma: {
322 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
323 if (Identifier == "clang" || Identifier == "GCC") {
324 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
325 // keep the directive in, commented out
326 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
327 NextToWrite, Line);
328 // update our own type
329 FileType = SM.getFileCharacteristic(RawToken.getLocation());
330 WriteLineInfo(FileName, Line, FileType, EOL);
331 }
332 } else if (Identifier == "once") {
333 // keep the directive in, commented out
334 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
335 NextToWrite, Line);
336 WriteLineInfo(FileName, Line, FileType, EOL);
337 }
338 break;
339 }
340 default:
341 break;
342 }
343 }
344 RawLex.setParsingPreprocessorDirective(false);
345 }
346 RawLex.LexFromRawLexer(RawToken);
347 }
348 OutputContentUpTo(FromFile, NextToWrite,
349 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
350 /*EnsureNewline*/true);
351 return true;
352}
353
David Blaikie60ad16b2012-06-14 17:36:01 +0000354/// InclusionRewriterInInput - Implement -frewrite-includes mode.
David Blaikie8c0b3782012-06-06 18:52:13 +0000355void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
356 const PreprocessorOutputOptions &Opts) {
357 SourceManager &SM = PP.getSourceManager();
358 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
359 Opts.ShowLineMarkers);
360 PP.addPPCallbacks(Rewrite);
361
362 // First let the preprocessor process the entire file and call callbacks.
363 // Callbacks will record which #include's were actually performed.
364 PP.EnterMainSourceFile();
365 Token Tok;
366 // Only preprocessor directives matter here, so disable macro expansion
367 // everywhere else as an optimization.
368 // TODO: It would be even faster if the preprocessor could be switched
369 // to a mode where it would parse only preprocessor directives and comments,
370 // nothing else matters for parsing or processing.
371 PP.SetMacroExpansionOnlyInDirectives();
372 do {
373 PP.Lex(Tok);
374 } while (Tok.isNot(tok::eof));
375 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
376 OS->flush();
377}