blob: 439d777f6c81fef5e539791fe703c1ffc37eb4b6 [file] [log] [blame]
David Blaikie8c0b3782012-06-06 18:52:13 +00001//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites include invocations into their expansions. This gives you
11// a file with all included files merged into it.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Rewriters.h"
16#include "clang/Lex/Preprocessor.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Frontend/PreprocessorOutputOptions.h"
19#include "llvm/Support/raw_ostream.h"
20
21using namespace clang;
22using namespace llvm;
23
24namespace {
25
26class InclusionRewriter : public PPCallbacks {
27 /// Information about which #includes were actually performed,
28 /// created by preprocessor callbacks.
29 struct FileChange {
30 SourceLocation From;
31 FileID Id;
32 SrcMgr::CharacteristicKind FileType;
33 FileChange(SourceLocation From) : From(From) {
34 }
35 };
36 Preprocessor &PP; //< Used to find inclusion directives.
37 SourceManager &SM; //< Used to read and manage source files.
38 raw_ostream &OS; //< The destination stream for rewritten contents.
39 bool ShowLineMarkers; //< Show #line markers.
40 bool UseLineDirective; //< Use of line directives or line markers.
41 typedef std::map<unsigned, FileChange> FileChangeMap;
42 FileChangeMap FileChanges; /// Tracks which files were included where.
43 /// Used transitively for building up the FileChanges mapping over the
44 /// various \c PPCallbacks callbacks.
45 FileChangeMap::iterator LastInsertedFileChange;
46public:
47 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
48 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
49private:
50 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
51 SrcMgr::CharacteristicKind FileType,
52 FileID PrevFID);
53 virtual void FileSkipped(const FileEntry &ParentFile,
54 const Token &FilenameTok,
55 SrcMgr::CharacteristicKind FileType);
56 virtual void InclusionDirective(SourceLocation HashLoc,
57 const Token &IncludeTok,
58 StringRef FileName,
59 bool IsAngled,
60 const FileEntry *File,
61 SourceLocation EndLoc,
62 StringRef SearchPath,
63 StringRef RelativePath);
64 void WriteLineInfo(const char *Filename, int Line,
65 SrcMgr::CharacteristicKind FileType,
66 StringRef EOL, StringRef Extra = StringRef());
67 void OutputContentUpTo(const MemoryBuffer &FromFile,
68 unsigned &WriteFrom, unsigned WriteTo,
69 StringRef EOL, int &lines,
70 bool EnsureNewline = false);
71 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
72 const MemoryBuffer &FromFile, StringRef EOL,
73 unsigned &NextToWrite, int &Lines);
74 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
75 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
76};
77
78} // end anonymous namespace
79
80/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
81InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
82 bool ShowLineMarkers)
83 : PP(PP), SM(PP.getSourceManager()), OS(OS),
84 ShowLineMarkers(ShowLineMarkers),
85 LastInsertedFileChange(FileChanges.end()) {
86 // If we're in microsoft mode, use normal #line instead of line markers.
87 UseLineDirective = PP.getLangOpts().MicrosoftExt;
88}
89
90/// Write appropriate line information as either #line directives or GNU line
91/// markers depending on what mode we're in, including the \p Filename and
92/// \p Line we are located at, using the specified \p EOL line separator, and
93/// any \p Extra context specifiers in GNU line directives.
94void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
95 SrcMgr::CharacteristicKind FileType,
96 StringRef EOL, StringRef Extra) {
97 if (!ShowLineMarkers)
98 return;
99 if (UseLineDirective) {
100 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
101 } else {
102 // Use GNU linemarkers as described here:
103 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
104 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
105 if (!Extra.empty())
106 OS << Extra;
107 if (FileType == SrcMgr::C_System)
108 // "`3' This indicates that the following text comes from a system header
109 // file, so certain warnings should be suppressed."
110 OS << " 3";
111 else if (FileType == SrcMgr::C_ExternCSystem)
112 // as above for `3', plus "`4' This indicates that the following text
113 // should be treated as being wrapped in an implicit extern "C" block."
114 OS << " 3 4";
115 }
116 OS << EOL;
117}
118
119/// FileChanged - Whenever the preprocessor enters or exits a #include file
120/// it invokes this handler.
121void InclusionRewriter::FileChanged(SourceLocation Loc,
122 FileChangeReason Reason,
123 SrcMgr::CharacteristicKind NewFileType,
124 FileID) {
125 if (Reason != EnterFile)
126 return;
127 if (LastInsertedFileChange == FileChanges.end())
128 // we didn't reach this file (eg: the main file) via an inclusion directive
129 return;
130 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
131 LastInsertedFileChange->second.FileType = NewFileType;
132 LastInsertedFileChange = FileChanges.end();
133}
134
135/// Called whenever an inclusion is skipped due to canonical header protection
136/// macros.
137void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
138 const Token &/*FilenameTok*/,
139 SrcMgr::CharacteristicKind /*FileType*/) {
140 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
141 "found via an inclusion directive, was skipped");
142 FileChanges.erase(LastInsertedFileChange);
143 LastInsertedFileChange = FileChanges.end();
144}
145
146/// This should be called whenever the preprocessor encounters include
147/// directives. It does not say whether the file has been included, but it
148/// provides more information about the directive (hash location instead
149/// of location inside the included file). It is assumed that the matching
150/// FileChanged() or FileSkipped() is called after this.
151void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
152 const Token &/*IncludeTok*/,
153 StringRef /*FileName*/,
154 bool /*IsAngled*/,
155 const FileEntry * /*File*/,
156 SourceLocation /*EndLoc*/,
157 StringRef /*SearchPath*/,
158 StringRef /*RelativePath*/) {
159 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
160 "directive was found before the previous one was processed");
161 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
162 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
163 assert(p.second && "Unexpected revisitation of the same include directive");
164 LastInsertedFileChange = p.first;
165}
166
167/// Simple lookup for a SourceLocation (specifically one denoting the hash in
168/// an inclusion directive) in the map of inclusion information, FileChanges.
169const InclusionRewriter::FileChange *
170InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
171 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
172 if (I != FileChanges.end())
173 return &I->second;
174 return NULL;
175}
176
177/// Count the raw \\n characters in the \p Len characters from \p Pos.
178inline unsigned CountNewLines(const char *Pos, int Len) {
179 const char *End = Pos + Len;
180 unsigned Lines = 0;
181 --Pos;
182 while ((Pos = static_cast<const char*>(memchr(Pos + 1, '\n', End - Pos - 1))))
183 ++Lines;
184 return Lines;
185}
186
187/// Detect the likely line ending style of \p FromFile by examining the first
188/// newline found within it.
189static StringRef DetectEOL(const MemoryBuffer &FromFile) {
190 // detect what line endings the file uses, so that added content does not mix
191 // the style
192 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
193 if (Pos == NULL)
194 return "\n";
195 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
196 return "\n\r";
197 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
198 return "\r\n";
199 return "\n";
200}
201
202/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
203/// \p WriteTo - 1.
204void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
205 unsigned &WriteFrom, unsigned WriteTo,
206 StringRef EOL, int &Line,
207 bool EnsureNewline) {
208 if (WriteTo <= WriteFrom)
209 return;
210 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
211 // count lines manually, it's faster than getPresumedLoc()
212 Line += CountNewLines(FromFile.getBufferStart() + WriteFrom,
213 WriteTo - WriteFrom);
214 if (EnsureNewline) {
215 char LastChar = FromFile.getBufferStart()[WriteTo - 1];
216 if (LastChar != '\n' && LastChar != '\r')
217 OS << EOL;
218 }
219 WriteFrom = WriteTo;
220}
221
222/// Print characters from \p FromFile starting at \p NextToWrite up until the
223/// inclusion directive at \p StartToken, then print out the inclusion
224/// inclusion directive disabled by a #if directive, updating \p NextToWrite
225/// and \p Line to track the number of source lines visited and the progress
226/// through the \p FromFile buffer.
227void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
228 const Token &StartToken,
229 const MemoryBuffer &FromFile,
230 StringRef EOL,
231 unsigned &NextToWrite, int &Line) {
232 OutputContentUpTo(FromFile, NextToWrite,
233 SM.getFileOffset(StartToken.getLocation()), EOL, Line);
234 Token DirectiveToken;
235 do {
236 DirectiveLex.LexFromRawLexer(DirectiveToken);
237 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
238 OS << "#if 0 /* expanded by -rewrite-includes */" << EOL;
239 OutputContentUpTo(FromFile, NextToWrite,
240 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
241 EOL, Line);
242 OS << "#endif /* expanded by -rewrite-includes */" << EOL;
243}
244
245/// Find the next identifier in the pragma directive specified by \p RawToken.
246StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
247 Token &RawToken) {
248 RawLex.LexFromRawLexer(RawToken);
249 if (RawToken.is(tok::raw_identifier))
250 PP.LookUpIdentifierInfo(RawToken);
251 if (RawToken.is(tok::identifier))
252 return RawToken.getIdentifierInfo()->getName();
253 return StringRef();
254}
255
256/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
257/// and including content of included files recursively.
258bool InclusionRewriter::Process(FileID FileId,
259 SrcMgr::CharacteristicKind FileType)
260{
261 bool Invalid;
262 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
263 assert(!Invalid && "Invalid FileID while trying to rewrite includes");
264 const char *FileName = FromFile.getBufferIdentifier();
265 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
266 RawLex.SetCommentRetentionState(false);
267
268 StringRef EOL = DetectEOL(FromFile);
269
270 // Per the GNU docs: "1" indicates the start of a new file.
271 WriteLineInfo(FileName, 1, FileType, EOL, " 1");
272
273 if (SM.getFileIDSize(FileId) == 0)
274 return true;
275
276 // The next byte to be copied from the source file
277 unsigned NextToWrite = 0;
278 int Line = 1; // The current input file line number.
279
280 Token RawToken;
281 RawLex.LexFromRawLexer(RawToken);
282
283 // TODO: Consider adding a switch that strips possibly unimportant content,
284 // such as comments, to reduce the size of repro files.
285 while (RawToken.isNot(tok::eof)) {
286 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
287 RawLex.setParsingPreprocessorDirective(true);
288 Token HashToken = RawToken;
289 RawLex.LexFromRawLexer(RawToken);
290 if (RawToken.is(tok::raw_identifier))
291 PP.LookUpIdentifierInfo(RawToken);
292 if (RawToken.is(tok::identifier)) {
293 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
294 case tok::pp_include:
295 case tok::pp_include_next:
296 case tok::pp_import: {
297 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
298 Line);
299 if (const FileChange *Change = FindFileChangeLocation(
300 HashToken.getLocation())) {
301 // now include and recursively process the file
302 if (Process(Change->Id, Change->FileType))
303 // and set lineinfo back to this file, if the nested one was
304 // actually included
305 // `2' indicates returning to a file (after having included
306 // another file.
307 WriteLineInfo(FileName, Line, FileType, EOL, " 2");
308 } else
309 // fix up lineinfo (since commented out directive changed line
310 // numbers) for inclusions that were skipped due to header guards
311 WriteLineInfo(FileName, Line, FileType, EOL);
312 break;
313 }
314 case tok::pp_pragma: {
315 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
316 if (Identifier == "clang" || Identifier == "GCC") {
317 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
318 // keep the directive in, commented out
319 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
320 NextToWrite, Line);
321 // update our own type
322 FileType = SM.getFileCharacteristic(RawToken.getLocation());
323 WriteLineInfo(FileName, Line, FileType, EOL);
324 }
325 } else if (Identifier == "once") {
326 // keep the directive in, commented out
327 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
328 NextToWrite, Line);
329 WriteLineInfo(FileName, Line, FileType, EOL);
330 }
331 break;
332 }
333 default:
334 break;
335 }
336 }
337 RawLex.setParsingPreprocessorDirective(false);
338 }
339 RawLex.LexFromRawLexer(RawToken);
340 }
341 OutputContentUpTo(FromFile, NextToWrite,
342 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
343 /*EnsureNewline*/true);
344 return true;
345}
346
347/// InclusionRewriterInInput - Implement -rewrite-includes mode.
348void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
349 const PreprocessorOutputOptions &Opts) {
350 SourceManager &SM = PP.getSourceManager();
351 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
352 Opts.ShowLineMarkers);
353 PP.addPPCallbacks(Rewrite);
354
355 // First let the preprocessor process the entire file and call callbacks.
356 // Callbacks will record which #include's were actually performed.
357 PP.EnterMainSourceFile();
358 Token Tok;
359 // Only preprocessor directives matter here, so disable macro expansion
360 // everywhere else as an optimization.
361 // TODO: It would be even faster if the preprocessor could be switched
362 // to a mode where it would parse only preprocessor directives and comments,
363 // nothing else matters for parsing or processing.
364 PP.SetMacroExpansionOnlyInDirectives();
365 do {
366 PP.Lex(Tok);
367 } while (Tok.isNot(tok::eof));
368 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
369 OS->flush();
370}