blob: cecc8672c5c2097e4c12e736c06c59b81486a181 [file] [log] [blame]
David Blaikie8c0b3782012-06-06 18:52:13 +00001//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions. This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
Ted Kremenek305c6132012-09-01 05:09:24 +000015#include "clang/Rewrite/Frontend/Rewriters.h"
David Blaikie8c0b3782012-06-06 18:52:13 +000016#include "clang/Lex/Preprocessor.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Frontend/PreprocessorOutputOptions.h"
19#include "llvm/Support/raw_ostream.h"
20
21using namespace clang;
22using namespace llvm;
23
24namespace {
25
26class InclusionRewriter : public PPCallbacks {
27 /// Information about which #includes were actually performed,
28 /// created by preprocessor callbacks.
29 struct FileChange {
30 SourceLocation From;
31 FileID Id;
32 SrcMgr::CharacteristicKind FileType;
33 FileChange(SourceLocation From) : From(From) {
34 }
35 };
Dmitri Gribenko49fdccb2012-06-08 23:13:42 +000036 Preprocessor &PP; ///< Used to find inclusion directives.
37 SourceManager &SM; ///< Used to read and manage source files.
38 raw_ostream &OS; ///< The destination stream for rewritten contents.
39 bool ShowLineMarkers; ///< Show #line markers.
40 bool UseLineDirective; ///< Use of line directives or line markers.
David Blaikie8c0b3782012-06-06 18:52:13 +000041 typedef std::map<unsigned, FileChange> FileChangeMap;
42 FileChangeMap FileChanges; /// Tracks which files were included where.
43 /// Used transitively for building up the FileChanges mapping over the
44 /// various \c PPCallbacks callbacks.
45 FileChangeMap::iterator LastInsertedFileChange;
46public:
47 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
48 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
49private:
50 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
51 SrcMgr::CharacteristicKind FileType,
52 FileID PrevFID);
53 virtual void FileSkipped(const FileEntry &ParentFile,
54 const Token &FilenameTok,
55 SrcMgr::CharacteristicKind FileType);
56 virtual void InclusionDirective(SourceLocation HashLoc,
57 const Token &IncludeTok,
58 StringRef FileName,
59 bool IsAngled,
Argyrios Kyrtzidisda313592012-09-27 01:42:07 +000060 CharSourceRange FilenameRange,
David Blaikie8c0b3782012-06-06 18:52:13 +000061 const FileEntry *File,
David Blaikie8c0b3782012-06-06 18:52:13 +000062 StringRef SearchPath,
Argyrios Kyrtzidisf8afcff2012-09-29 01:06:10 +000063 StringRef RelativePath,
64 const Module *Imported);
David Blaikie8c0b3782012-06-06 18:52:13 +000065 void WriteLineInfo(const char *Filename, int Line,
66 SrcMgr::CharacteristicKind FileType,
67 StringRef EOL, StringRef Extra = StringRef());
68 void OutputContentUpTo(const MemoryBuffer &FromFile,
69 unsigned &WriteFrom, unsigned WriteTo,
70 StringRef EOL, int &lines,
71 bool EnsureNewline = false);
72 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
73 const MemoryBuffer &FromFile, StringRef EOL,
74 unsigned &NextToWrite, int &Lines);
75 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
76 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
77};
78
79} // end anonymous namespace
80
81/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
82InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
83 bool ShowLineMarkers)
84 : PP(PP), SM(PP.getSourceManager()), OS(OS),
85 ShowLineMarkers(ShowLineMarkers),
86 LastInsertedFileChange(FileChanges.end()) {
87 // If we're in microsoft mode, use normal #line instead of line markers.
88 UseLineDirective = PP.getLangOpts().MicrosoftExt;
89}
90
91/// Write appropriate line information as either #line directives or GNU line
92/// markers depending on what mode we're in, including the \p Filename and
93/// \p Line we are located at, using the specified \p EOL line separator, and
94/// any \p Extra context specifiers in GNU line directives.
95void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
96 SrcMgr::CharacteristicKind FileType,
97 StringRef EOL, StringRef Extra) {
98 if (!ShowLineMarkers)
99 return;
100 if (UseLineDirective) {
101 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
102 } else {
103 // Use GNU linemarkers as described here:
104 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
105 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
106 if (!Extra.empty())
107 OS << Extra;
108 if (FileType == SrcMgr::C_System)
109 // "`3' This indicates that the following text comes from a system header
110 // file, so certain warnings should be suppressed."
111 OS << " 3";
112 else if (FileType == SrcMgr::C_ExternCSystem)
113 // as above for `3', plus "`4' This indicates that the following text
114 // should be treated as being wrapped in an implicit extern "C" block."
115 OS << " 3 4";
116 }
117 OS << EOL;
118}
119
120/// FileChanged - Whenever the preprocessor enters or exits a #include file
121/// it invokes this handler.
122void InclusionRewriter::FileChanged(SourceLocation Loc,
123 FileChangeReason Reason,
124 SrcMgr::CharacteristicKind NewFileType,
125 FileID) {
126 if (Reason != EnterFile)
127 return;
128 if (LastInsertedFileChange == FileChanges.end())
129 // we didn't reach this file (eg: the main file) via an inclusion directive
130 return;
131 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
132 LastInsertedFileChange->second.FileType = NewFileType;
133 LastInsertedFileChange = FileChanges.end();
134}
135
136/// Called whenever an inclusion is skipped due to canonical header protection
137/// macros.
138void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
139 const Token &/*FilenameTok*/,
140 SrcMgr::CharacteristicKind /*FileType*/) {
141 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
142 "found via an inclusion directive, was skipped");
143 FileChanges.erase(LastInsertedFileChange);
144 LastInsertedFileChange = FileChanges.end();
145}
146
147/// This should be called whenever the preprocessor encounters include
148/// directives. It does not say whether the file has been included, but it
149/// provides more information about the directive (hash location instead
150/// of location inside the included file). It is assumed that the matching
151/// FileChanged() or FileSkipped() is called after this.
152void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
153 const Token &/*IncludeTok*/,
154 StringRef /*FileName*/,
155 bool /*IsAngled*/,
Argyrios Kyrtzidisda313592012-09-27 01:42:07 +0000156 CharSourceRange /*FilenameRange*/,
David Blaikie8c0b3782012-06-06 18:52:13 +0000157 const FileEntry * /*File*/,
David Blaikie8c0b3782012-06-06 18:52:13 +0000158 StringRef /*SearchPath*/,
Argyrios Kyrtzidisf8afcff2012-09-29 01:06:10 +0000159 StringRef /*RelativePath*/,
160 const Module */*Imported*/) {
David Blaikie8c0b3782012-06-06 18:52:13 +0000161 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
162 "directive was found before the previous one was processed");
163 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
164 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
165 assert(p.second && "Unexpected revisitation of the same include directive");
166 LastInsertedFileChange = p.first;
167}
168
169/// Simple lookup for a SourceLocation (specifically one denoting the hash in
170/// an inclusion directive) in the map of inclusion information, FileChanges.
171const InclusionRewriter::FileChange *
172InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
173 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
174 if (I != FileChanges.end())
175 return &I->second;
176 return NULL;
177}
178
David Blaikie8c0b3782012-06-06 18:52:13 +0000179/// Detect the likely line ending style of \p FromFile by examining the first
180/// newline found within it.
181static StringRef DetectEOL(const MemoryBuffer &FromFile) {
182 // detect what line endings the file uses, so that added content does not mix
183 // the style
184 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
185 if (Pos == NULL)
186 return "\n";
187 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
188 return "\n\r";
189 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
190 return "\r\n";
191 return "\n";
192}
193
194/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
195/// \p WriteTo - 1.
196void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
197 unsigned &WriteFrom, unsigned WriteTo,
198 StringRef EOL, int &Line,
199 bool EnsureNewline) {
200 if (WriteTo <= WriteFrom)
201 return;
202 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
203 // count lines manually, it's faster than getPresumedLoc()
Benjamin Kramer31598192012-06-09 13:18:14 +0000204 Line += std::count(FromFile.getBufferStart() + WriteFrom,
205 FromFile.getBufferStart() + WriteTo, '\n');
David Blaikie8c0b3782012-06-06 18:52:13 +0000206 if (EnsureNewline) {
207 char LastChar = FromFile.getBufferStart()[WriteTo - 1];
208 if (LastChar != '\n' && LastChar != '\r')
209 OS << EOL;
210 }
211 WriteFrom = WriteTo;
212}
213
214/// Print characters from \p FromFile starting at \p NextToWrite up until the
215/// inclusion directive at \p StartToken, then print out the inclusion
216/// inclusion directive disabled by a #if directive, updating \p NextToWrite
217/// and \p Line to track the number of source lines visited and the progress
218/// through the \p FromFile buffer.
219void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
220 const Token &StartToken,
221 const MemoryBuffer &FromFile,
222 StringRef EOL,
223 unsigned &NextToWrite, int &Line) {
224 OutputContentUpTo(FromFile, NextToWrite,
225 SM.getFileOffset(StartToken.getLocation()), EOL, Line);
226 Token DirectiveToken;
227 do {
228 DirectiveLex.LexFromRawLexer(DirectiveToken);
229 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
David Blaikie60ad16b2012-06-14 17:36:01 +0000230 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
David Blaikie8c0b3782012-06-06 18:52:13 +0000231 OutputContentUpTo(FromFile, NextToWrite,
232 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
233 EOL, Line);
David Blaikie60ad16b2012-06-14 17:36:01 +0000234 OS << "#endif /* expanded by -frewrite-includes */" << EOL;
David Blaikie8c0b3782012-06-06 18:52:13 +0000235}
236
237/// Find the next identifier in the pragma directive specified by \p RawToken.
238StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
239 Token &RawToken) {
240 RawLex.LexFromRawLexer(RawToken);
241 if (RawToken.is(tok::raw_identifier))
242 PP.LookUpIdentifierInfo(RawToken);
243 if (RawToken.is(tok::identifier))
244 return RawToken.getIdentifierInfo()->getName();
245 return StringRef();
246}
247
248/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
249/// and including content of included files recursively.
250bool InclusionRewriter::Process(FileID FileId,
251 SrcMgr::CharacteristicKind FileType)
252{
253 bool Invalid;
254 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
David Blaikiebae2b312012-06-14 17:36:05 +0000255 if (Invalid) // invalid inclusion
256 return true;
David Blaikie8c0b3782012-06-06 18:52:13 +0000257 const char *FileName = FromFile.getBufferIdentifier();
258 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
259 RawLex.SetCommentRetentionState(false);
260
261 StringRef EOL = DetectEOL(FromFile);
262
263 // Per the GNU docs: "1" indicates the start of a new file.
264 WriteLineInfo(FileName, 1, FileType, EOL, " 1");
265
266 if (SM.getFileIDSize(FileId) == 0)
267 return true;
268
269 // The next byte to be copied from the source file
270 unsigned NextToWrite = 0;
271 int Line = 1; // The current input file line number.
272
273 Token RawToken;
274 RawLex.LexFromRawLexer(RawToken);
275
276 // TODO: Consider adding a switch that strips possibly unimportant content,
277 // such as comments, to reduce the size of repro files.
278 while (RawToken.isNot(tok::eof)) {
279 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
280 RawLex.setParsingPreprocessorDirective(true);
281 Token HashToken = RawToken;
282 RawLex.LexFromRawLexer(RawToken);
283 if (RawToken.is(tok::raw_identifier))
284 PP.LookUpIdentifierInfo(RawToken);
285 if (RawToken.is(tok::identifier)) {
286 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
287 case tok::pp_include:
288 case tok::pp_include_next:
289 case tok::pp_import: {
290 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
291 Line);
292 if (const FileChange *Change = FindFileChangeLocation(
293 HashToken.getLocation())) {
294 // now include and recursively process the file
295 if (Process(Change->Id, Change->FileType))
296 // and set lineinfo back to this file, if the nested one was
297 // actually included
298 // `2' indicates returning to a file (after having included
299 // another file.
300 WriteLineInfo(FileName, Line, FileType, EOL, " 2");
301 } else
302 // fix up lineinfo (since commented out directive changed line
303 // numbers) for inclusions that were skipped due to header guards
304 WriteLineInfo(FileName, Line, FileType, EOL);
305 break;
306 }
307 case tok::pp_pragma: {
308 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
309 if (Identifier == "clang" || Identifier == "GCC") {
310 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
311 // keep the directive in, commented out
312 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
313 NextToWrite, Line);
314 // update our own type
315 FileType = SM.getFileCharacteristic(RawToken.getLocation());
316 WriteLineInfo(FileName, Line, FileType, EOL);
317 }
318 } else if (Identifier == "once") {
319 // keep the directive in, commented out
320 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
321 NextToWrite, Line);
322 WriteLineInfo(FileName, Line, FileType, EOL);
323 }
324 break;
325 }
326 default:
327 break;
328 }
329 }
330 RawLex.setParsingPreprocessorDirective(false);
331 }
332 RawLex.LexFromRawLexer(RawToken);
333 }
334 OutputContentUpTo(FromFile, NextToWrite,
335 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
336 /*EnsureNewline*/true);
337 return true;
338}
339
David Blaikie60ad16b2012-06-14 17:36:01 +0000340/// InclusionRewriterInInput - Implement -frewrite-includes mode.
David Blaikie8c0b3782012-06-06 18:52:13 +0000341void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
342 const PreprocessorOutputOptions &Opts) {
343 SourceManager &SM = PP.getSourceManager();
344 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
345 Opts.ShowLineMarkers);
346 PP.addPPCallbacks(Rewrite);
347
348 // First let the preprocessor process the entire file and call callbacks.
349 // Callbacks will record which #include's were actually performed.
350 PP.EnterMainSourceFile();
351 Token Tok;
352 // Only preprocessor directives matter here, so disable macro expansion
353 // everywhere else as an optimization.
354 // TODO: It would be even faster if the preprocessor could be switched
355 // to a mode where it would parse only preprocessor directives and comments,
356 // nothing else matters for parsing or processing.
357 PP.SetMacroExpansionOnlyInDirectives();
358 do {
359 PP.Lex(Tok);
360 } while (Tok.isNot(tok::eof));
361 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
362 OS->flush();
363}