blob: 0f6ebabe373e590798fb642a34cc1670767598c6 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner4b009652007-07-25 00:24:17 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Preprocessor interface.
11//
12//===----------------------------------------------------------------------===//
13//
14// Options to support:
15// -H - Print the name of each header file used.
16// -d[MDNI] - Dump various things.
17// -fworking-directory - #line's with preprocessor's working dir.
18// -fpreprocessed
19// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20// -W*
21// -w
22//
23// Messages to emit:
24// "Multiple include guards may be useful for:\n"
25//
26//===----------------------------------------------------------------------===//
27
28#include "clang/Lex/Preprocessor.h"
29#include "clang/Lex/HeaderSearch.h"
30#include "clang/Lex/MacroInfo.h"
31#include "clang/Lex/PPCallbacks.h"
32#include "clang/Lex/Pragma.h"
33#include "clang/Lex/ScratchBuffer.h"
34#include "clang/Basic/Diagnostic.h"
35#include "clang/Basic/FileManager.h"
36#include "clang/Basic/SourceManager.h"
37#include "clang/Basic/TargetInfo.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/Support/MemoryBuffer.h"
Ted Kremenekce4c64e2008-01-14 16:44:48 +000040#include "llvm/Support/Streams.h"
Chris Lattner1b023182007-09-03 18:30:32 +000041#include <ctime>
Chris Lattner4b009652007-07-25 00:24:17 +000042using namespace clang;
43
44//===----------------------------------------------------------------------===//
45
46Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
47 TargetInfo &target, SourceManager &SM,
48 HeaderSearch &Headers)
49 : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()),
50 SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts),
51 CurLexer(0), CurDirLookup(0), CurMacroExpander(0), Callbacks(0) {
52 ScratchBuf = new ScratchBuffer(SourceMgr);
53
54 // Clear stats.
55 NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
56 NumIf = NumElse = NumEndif = 0;
57 NumEnteredSourceFiles = 0;
58 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
59 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
60 MaxIncludeStackDepth = 0;
61 NumSkipped = 0;
62
63 // Default to discarding comments.
64 KeepComments = false;
65 KeepMacroComments = false;
66
67 // Macro expansion is enabled.
68 DisableMacroExpansion = false;
69 InMacroArgs = false;
70 NumCachedMacroExpanders = 0;
71
72 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
73 // This gets unpoisoned where it is allowed.
74 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
75
Chris Lattnerd1f21e12007-10-09 22:10:18 +000076 Predefines = 0;
77
Chris Lattner4b009652007-07-25 00:24:17 +000078 // Initialize the pragma handlers.
79 PragmaHandlers = new PragmaNamespace(0);
80 RegisterBuiltinPragmas();
81
82 // Initialize builtin macros like __LINE__ and friends.
83 RegisterBuiltinMacros();
84}
85
86Preprocessor::~Preprocessor() {
87 // Free any active lexers.
88 delete CurLexer;
89
90 while (!IncludeMacroStack.empty()) {
91 delete IncludeMacroStack.back().TheLexer;
92 delete IncludeMacroStack.back().TheMacroExpander;
93 IncludeMacroStack.pop_back();
94 }
Chris Lattner7a1b0882007-10-07 08:44:20 +000095
96 // Free any macro definitions.
97 for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
98 Macros.begin(), E = Macros.end(); I != E; ++I) {
99 // Free the macro definition.
100 delete I->second;
101 I->second = 0;
102 I->first->setHasMacroDefinition(false);
103 }
Chris Lattner4b009652007-07-25 00:24:17 +0000104
105 // Free any cached macro expanders.
106 for (unsigned i = 0, e = NumCachedMacroExpanders; i != e; ++i)
107 delete MacroExpanderCache[i];
108
109 // Release pragma information.
110 delete PragmaHandlers;
111
112 // Delete the scratch buffer info.
113 delete ScratchBuf;
114}
115
116PPCallbacks::~PPCallbacks() {
117}
118
119/// Diag - Forwarding function for diagnostics. This emits a diagnostic at
120/// the specified Token's location, translating the token's start
121/// position in the current buffer into a SourcePosition object for rendering.
122void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) {
Ted Kremenekd7f64cd2007-12-12 22:39:36 +0000123 Diags.Report(getFullLoc(Loc), DiagID);
Chris Lattner4b009652007-07-25 00:24:17 +0000124}
125
126void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID,
127 const std::string &Msg) {
Ted Kremenekd7f64cd2007-12-12 22:39:36 +0000128 Diags.Report(getFullLoc(Loc), DiagID, &Msg, 1);
Chris Lattner4b009652007-07-25 00:24:17 +0000129}
130
131void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000132 llvm::cerr << tok::getTokenName(Tok.getKind()) << " '"
133 << getSpelling(Tok) << "'";
Chris Lattner4b009652007-07-25 00:24:17 +0000134
135 if (!DumpFlags) return;
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000136
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000137 llvm::cerr << "\t";
Chris Lattner4b009652007-07-25 00:24:17 +0000138 if (Tok.isAtStartOfLine())
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000139 llvm::cerr << " [StartOfLine]";
Chris Lattner4b009652007-07-25 00:24:17 +0000140 if (Tok.hasLeadingSpace())
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000141 llvm::cerr << " [LeadingSpace]";
Chris Lattner4b009652007-07-25 00:24:17 +0000142 if (Tok.isExpandDisabled())
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000143 llvm::cerr << " [ExpandDisabled]";
Chris Lattner4b009652007-07-25 00:24:17 +0000144 if (Tok.needsCleaning()) {
145 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000146 llvm::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength())
147 << "']";
Chris Lattner4b009652007-07-25 00:24:17 +0000148 }
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000149
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000150 llvm::cerr << "\tLoc=<";
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000151 DumpLocation(Tok.getLocation());
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000152 llvm::cerr << ">";
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000153}
154
155void Preprocessor::DumpLocation(SourceLocation Loc) const {
156 SourceLocation LogLoc = SourceMgr.getLogicalLoc(Loc);
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000157 llvm::cerr << SourceMgr.getSourceName(LogLoc) << ':'
158 << SourceMgr.getLineNumber(LogLoc) << ':'
159 << SourceMgr.getLineNumber(LogLoc);
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000160
161 SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(Loc);
162 if (PhysLoc != LogLoc) {
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000163 llvm::cerr << " <PhysLoc=";
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000164 DumpLocation(PhysLoc);
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000165 llvm::cerr << ">";
Chris Lattnerc0f7c512007-12-09 20:31:55 +0000166 }
Chris Lattner4b009652007-07-25 00:24:17 +0000167}
168
169void Preprocessor::DumpMacro(const MacroInfo &MI) const {
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000170 llvm::cerr << "MACRO: ";
Chris Lattner4b009652007-07-25 00:24:17 +0000171 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
172 DumpToken(MI.getReplacementToken(i));
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000173 llvm::cerr << " ";
Chris Lattner4b009652007-07-25 00:24:17 +0000174 }
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000175 llvm::cerr << "\n";
Chris Lattner4b009652007-07-25 00:24:17 +0000176}
177
178void Preprocessor::PrintStats() {
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000179 llvm::cerr << "\n*** Preprocessor Stats:\n";
180 llvm::cerr << NumDirectives << " directives found:\n";
181 llvm::cerr << " " << NumDefined << " #define.\n";
182 llvm::cerr << " " << NumUndefined << " #undef.\n";
183 llvm::cerr << " #include/#include_next/#import:\n";
184 llvm::cerr << " " << NumEnteredSourceFiles << " source files entered.\n";
185 llvm::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n";
186 llvm::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n";
187 llvm::cerr << " " << NumElse << " #else/#elif.\n";
188 llvm::cerr << " " << NumEndif << " #endif.\n";
189 llvm::cerr << " " << NumPragma << " #pragma.\n";
190 llvm::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
Chris Lattner4b009652007-07-25 00:24:17 +0000191
Ted Kremenekce4c64e2008-01-14 16:44:48 +0000192 llvm::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
193 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
194 << NumFastMacroExpanded << " on the fast path.\n";
195 llvm::cerr << (NumFastTokenPaste+NumTokenPaste)
196 << " token paste (##) operations performed, "
197 << NumFastTokenPaste << " on the fast path.\n";
Chris Lattner4b009652007-07-25 00:24:17 +0000198}
199
200//===----------------------------------------------------------------------===//
201// Token Spelling
202//===----------------------------------------------------------------------===//
203
204
205/// getSpelling() - Return the 'spelling' of this token. The spelling of a
206/// token are the characters used to represent the token in the source file
207/// after trigraph expansion and escaped-newline folding. In particular, this
208/// wants to get the true, uncanonicalized, spelling of things like digraphs
209/// UCNs, etc.
210std::string Preprocessor::getSpelling(const Token &Tok) const {
211 assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
212
213 // If this token contains nothing interesting, return it directly.
214 const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
215 if (!Tok.needsCleaning())
216 return std::string(TokStart, TokStart+Tok.getLength());
217
218 std::string Result;
219 Result.reserve(Tok.getLength());
220
221 // Otherwise, hard case, relex the characters into the string.
222 for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
223 Ptr != End; ) {
224 unsigned CharSize;
225 Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
226 Ptr += CharSize;
227 }
228 assert(Result.size() != unsigned(Tok.getLength()) &&
229 "NeedsCleaning flag set on something that didn't need cleaning!");
230 return Result;
231}
232
233/// getSpelling - This method is used to get the spelling of a token into a
234/// preallocated buffer, instead of as an std::string. The caller is required
235/// to allocate enough space for the token, which is guaranteed to be at least
236/// Tok.getLength() bytes long. The actual length of the token is returned.
237///
238/// Note that this method may do two possible things: it may either fill in
239/// the buffer specified with characters, or it may *change the input pointer*
240/// to point to a constant buffer with the data already in it (avoiding a
241/// copy). The caller is not allowed to modify the returned buffer pointer
242/// if an internal buffer is returned.
243unsigned Preprocessor::getSpelling(const Token &Tok,
244 const char *&Buffer) const {
245 assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
246
247 // If this token is an identifier, just return the string from the identifier
248 // table, which is very quick.
249 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
250 Buffer = II->getName();
251
252 // Return the length of the token. If the token needed cleaning, don't
253 // include the size of the newlines or trigraphs in it.
254 if (!Tok.needsCleaning())
255 return Tok.getLength();
256 else
257 return strlen(Buffer);
258 }
259
260 // Otherwise, compute the start of the token in the input lexer buffer.
261 const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
262
263 // If this token contains nothing interesting, return it directly.
264 if (!Tok.needsCleaning()) {
265 Buffer = TokStart;
266 return Tok.getLength();
267 }
268 // Otherwise, hard case, relex the characters into the string.
269 char *OutBuf = const_cast<char*>(Buffer);
270 for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
271 Ptr != End; ) {
272 unsigned CharSize;
273 *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
274 Ptr += CharSize;
275 }
276 assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
277 "NeedsCleaning flag set on something that didn't need cleaning!");
278
279 return OutBuf-Buffer;
280}
281
282
283/// CreateString - Plop the specified string into a scratch buffer and return a
284/// location for it. If specified, the source location provides a source
285/// location for the token.
286SourceLocation Preprocessor::
287CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
288 if (SLoc.isValid())
289 return ScratchBuf->getToken(Buf, Len, SLoc);
290 return ScratchBuf->getToken(Buf, Len);
291}
292
293
294/// AdvanceToTokenCharacter - Given a location that specifies the start of a
295/// token, return a new location that specifies a character within the token.
296SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart,
297 unsigned CharNo) {
298 // If they request the first char of the token, we're trivially done. If this
299 // is a macro expansion, it doesn't make sense to point to a character within
300 // the instantiation point (the name). We could point to the source
301 // character, but without also pointing to instantiation info, this is
302 // confusing.
303 if (CharNo == 0 || TokStart.isMacroID()) return TokStart;
304
305 // Figure out how many physical characters away the specified logical
306 // character is. This needs to take into consideration newlines and
307 // trigraphs.
308 const char *TokPtr = SourceMgr.getCharacterData(TokStart);
309 unsigned PhysOffset = 0;
310
311 // The usual case is that tokens don't contain anything interesting. Skip
312 // over the uninteresting characters. If a token only consists of simple
313 // chars, this method is extremely fast.
314 while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr))
315 ++TokPtr, --CharNo, ++PhysOffset;
316
317 // If we have a character that may be a trigraph or escaped newline, create a
318 // lexer to parse it correctly.
319 if (CharNo != 0) {
320 // Create a lexer starting at this token position.
321 Lexer TheLexer(TokStart, *this, TokPtr);
322 Token Tok;
323 // Skip over characters the remaining characters.
324 const char *TokStartPtr = TokPtr;
325 for (; CharNo; --CharNo)
326 TheLexer.getAndAdvanceChar(TokPtr, Tok);
327
328 PhysOffset += TokPtr-TokStartPtr;
329 }
330
331 return TokStart.getFileLocWithOffset(PhysOffset);
332}
333
334
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000335//===----------------------------------------------------------------------===//
336// Preprocessor Initialization Methods
337//===----------------------------------------------------------------------===//
338
339// Append a #define line to Buf for Macro. Macro should be of the form XXX,
340// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit
341// "#define XXX Y z W". To get a #define with no value, use "XXX=".
342static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro,
343 const char *Command = "#define ") {
344 Buf.insert(Buf.end(), Command, Command+strlen(Command));
345 if (const char *Equal = strchr(Macro, '=')) {
346 // Turn the = into ' '.
347 Buf.insert(Buf.end(), Macro, Equal);
348 Buf.push_back(' ');
349 Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal));
350 } else {
351 // Push "macroname 1".
352 Buf.insert(Buf.end(), Macro, Macro+strlen(Macro));
353 Buf.push_back(' ');
354 Buf.push_back('1');
355 }
356 Buf.push_back('\n');
357}
358
359
360static void InitializePredefinedMacros(Preprocessor &PP,
361 std::vector<char> &Buf) {
362 // FIXME: Implement magic like cpp_init_builtins for things like __STDC__
363 // and __DATE__ etc.
364#if 0
365 /* __STDC__ has the value 1 under normal circumstances.
366 However, if (a) we are in a system header, (b) the option
367 stdc_0_in_system_headers is true (set by target config), and
368 (c) we are not in strictly conforming mode, then it has the
369 value 0. (b) and (c) are already checked in cpp_init_builtins. */
370 //case BT_STDC:
371 if (cpp_in_system_header (pfile))
372 number = 0;
373 else
374 number = 1;
375 break;
376#endif
377 // These should all be defined in the preprocessor according to the
378 // current language configuration.
379 DefineBuiltinMacro(Buf, "__STDC__=1");
380 //DefineBuiltinMacro(Buf, "__ASSEMBLER__=1");
381 if (PP.getLangOptions().C99 && !PP.getLangOptions().CPlusPlus)
382 DefineBuiltinMacro(Buf, "__STDC_VERSION__=199901L");
383 else if (0) // STDC94 ?
384 DefineBuiltinMacro(Buf, "__STDC_VERSION__=199409L");
385
386 DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1");
387 if (PP.getLangOptions().ObjC1)
388 DefineBuiltinMacro(Buf, "__OBJC__=1");
389 if (PP.getLangOptions().ObjC2)
390 DefineBuiltinMacro(Buf, "__OBJC2__=1");
Steve Naroffae84af82007-10-31 18:42:27 +0000391
Chris Lattner77cec472007-10-10 17:48:53 +0000392 // Add __builtin_va_list typedef.
393 {
394 const char *VAList = PP.getTargetInfo().getVAListDeclaration();
395 Buf.insert(Buf.end(), VAList, VAList+strlen(VAList));
396 Buf.push_back('\n');
397 }
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000398
399 // Get the target #defines.
400 PP.getTargetInfo().getTargetDefines(Buf);
401
402 // Compiler set macros.
403 DefineBuiltinMacro(Buf, "__APPLE_CC__=5250");
Steve Naroffb5a086e2007-11-10 18:06:36 +0000404 DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1050");
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000405 DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0");
406 DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1");
407 DefineBuiltinMacro(Buf, "__GNUC__=4");
408 DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002");
409 DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. "
410 "build 5250)\"");
411
412 // Build configuration options.
413 DefineBuiltinMacro(Buf, "__DYNAMIC__=1");
414 DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0");
415 DefineBuiltinMacro(Buf, "__NO_INLINE__=1");
416 DefineBuiltinMacro(Buf, "__PIC__=1");
417
418
419 if (PP.getLangOptions().CPlusPlus) {
420 DefineBuiltinMacro(Buf, "__DEPRECATED=1");
421 DefineBuiltinMacro(Buf, "__EXCEPTIONS=1");
422 DefineBuiltinMacro(Buf, "__GNUG__=4");
423 DefineBuiltinMacro(Buf, "__GXX_WEAK__=1");
424 DefineBuiltinMacro(Buf, "__cplusplus=1");
425 DefineBuiltinMacro(Buf, "__private_extern__=extern");
426 }
Steve Naroff73a07032008-02-07 03:50:06 +0000427 if (PP.getLangOptions().Microsoft) {
428 DefineBuiltinMacro(Buf, "__stdcall=");
429 DefineBuiltinMacro(Buf, "__cdecl=");
430 DefineBuiltinMacro(Buf, "_cdecl=");
431 DefineBuiltinMacro(Buf, "__ptr64=");
Steve Naroffbe880ec2008-02-07 23:24:32 +0000432 DefineBuiltinMacro(Buf, "__w64=");
Steve Naroff73a07032008-02-07 03:50:06 +0000433 DefineBuiltinMacro(Buf, "__forceinline=");
Steve Narofff9bba132008-02-07 15:26:07 +0000434 DefineBuiltinMacro(Buf, "__int8=char");
435 DefineBuiltinMacro(Buf, "__int16=short");
436 DefineBuiltinMacro(Buf, "__int32=int");
Chris Lattnerd1a552b2008-02-10 21:12:45 +0000437 DefineBuiltinMacro(Buf, "__int64=long long");
Steve Naroffcfe78212008-02-11 22:29:58 +0000438 DefineBuiltinMacro(Buf, "__declspec(X)=");
Steve Naroff73a07032008-02-07 03:50:06 +0000439 }
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000440 // FIXME: Should emit a #line directive here.
441}
442
443
444/// EnterMainSourceFile - Enter the specified FileID as the main source file,
Nate Begeman886bf132008-01-07 04:01:26 +0000445/// which implicitly adds the builtin defines etc.
Ted Kremenek17861c52007-12-19 22:51:13 +0000446void Preprocessor::EnterMainSourceFile() {
447
448 unsigned MainFileID = SourceMgr.getMainFileID();
449
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000450 // Enter the main file source buffer.
451 EnterSourceFile(MainFileID, 0);
452
Chris Lattnerb45f05c2007-11-15 19:07:47 +0000453 // Tell the header info that the main file was entered. If the file is later
454 // #imported, it won't be re-entered.
455 if (const FileEntry *FE =
456 SourceMgr.getFileEntryForLoc(SourceLocation::getFileLoc(MainFileID, 0)))
457 HeaderInfo.IncrementIncludeCount(FE);
458
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000459 std::vector<char> PrologFile;
460 PrologFile.reserve(4080);
461
462 // Install things like __POWERPC__, __GNUC__, etc into the macro table.
463 InitializePredefinedMacros(*this, PrologFile);
464
465 // Add on the predefines from the driver.
466 PrologFile.insert(PrologFile.end(), Predefines,Predefines+strlen(Predefines));
467
468 // Memory buffer must end with a null byte!
469 PrologFile.push_back(0);
470
471 // Now that we have emitted the predefined macros, #includes, etc into
472 // PrologFile, preprocess it to populate the initial preprocessor state.
473 llvm::MemoryBuffer *SB =
474 llvm::MemoryBuffer::getMemBufferCopy(&PrologFile.front(),&PrologFile.back(),
475 "<predefines>");
476 assert(SB && "Cannot fail to create predefined source buffer");
477 unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB);
478 assert(FileID && "Could not create FileID for predefines?");
479
480 // Start parsing the predefines.
481 EnterSourceFile(FileID, 0);
482}
Chris Lattner4b009652007-07-25 00:24:17 +0000483
484//===----------------------------------------------------------------------===//
485// Source File Location Methods.
486//===----------------------------------------------------------------------===//
487
488/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
489/// return null on failure. isAngled indicates whether the file reference is
490/// for system #include's or not (i.e. using <> instead of "").
491const FileEntry *Preprocessor::LookupFile(const char *FilenameStart,
492 const char *FilenameEnd,
493 bool isAngled,
494 const DirectoryLookup *FromDir,
495 const DirectoryLookup *&CurDir) {
496 // If the header lookup mechanism may be relative to the current file, pass in
497 // info about where the current file is.
498 const FileEntry *CurFileEnt = 0;
499 if (!FromDir) {
500 SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
501 CurFileEnt = SourceMgr.getFileEntryForLoc(FileLoc);
502 }
503
504 // Do a standard file entry lookup.
505 CurDir = CurDirLookup;
506 const FileEntry *FE =
507 HeaderInfo.LookupFile(FilenameStart, FilenameEnd,
508 isAngled, FromDir, CurDir, CurFileEnt);
509 if (FE) return FE;
510
511 // Otherwise, see if this is a subframework header. If so, this is relative
512 // to one of the headers on the #include stack. Walk the list of the current
513 // headers on the #include stack and pass them to HeaderInfo.
514 if (CurLexer && !CurLexer->Is_PragmaLexer) {
Chris Lattner017d65b2008-02-01 05:34:02 +0000515 if ((CurFileEnt = SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc())))
516 if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd,
517 CurFileEnt)))
518 return FE;
Chris Lattner4b009652007-07-25 00:24:17 +0000519 }
520
521 for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
522 IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
523 if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) {
Chris Lattner017d65b2008-02-01 05:34:02 +0000524 if ((CurFileEnt =
525 SourceMgr.getFileEntryForLoc(ISEntry.TheLexer->getFileLoc())))
526 if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart,
527 FilenameEnd, CurFileEnt)))
528 return FE;
Chris Lattner4b009652007-07-25 00:24:17 +0000529 }
530 }
531
532 // Otherwise, we really couldn't find the file.
533 return 0;
534}
535
536/// isInPrimaryFile - Return true if we're in the top-level file, not in a
537/// #include.
538bool Preprocessor::isInPrimaryFile() const {
539 if (CurLexer && !CurLexer->Is_PragmaLexer)
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000540 return IncludeMacroStack.empty();
Chris Lattner4b009652007-07-25 00:24:17 +0000541
542 // If there are any stacked lexers, we're in a #include.
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000543 assert(IncludeMacroStack[0].TheLexer &&
544 !IncludeMacroStack[0].TheLexer->Is_PragmaLexer &&
545 "Top level include stack isn't our primary lexer?");
546 for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
Chris Lattner4b009652007-07-25 00:24:17 +0000547 if (IncludeMacroStack[i].TheLexer &&
548 !IncludeMacroStack[i].TheLexer->Is_PragmaLexer)
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000549 return false;
550 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000551}
552
553/// getCurrentLexer - Return the current file lexer being lexed from. Note
554/// that this ignores any potentially active macro expansions and _Pragma
555/// expansions going on at the time.
556Lexer *Preprocessor::getCurrentFileLexer() const {
557 if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer;
558
559 // Look for a stacked lexer.
560 for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
561 Lexer *L = IncludeMacroStack[i-1].TheLexer;
562 if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions.
563 return L;
564 }
565 return 0;
566}
567
568
569/// EnterSourceFile - Add a source file to the top of the include stack and
570/// start lexing tokens from it instead of the current buffer. Return true
571/// on failure.
572void Preprocessor::EnterSourceFile(unsigned FileID,
Chris Lattnerd1f21e12007-10-09 22:10:18 +0000573 const DirectoryLookup *CurDir) {
Chris Lattner4b009652007-07-25 00:24:17 +0000574 assert(CurMacroExpander == 0 && "Cannot #include a file inside a macro!");
575 ++NumEnteredSourceFiles;
576
577 if (MaxIncludeStackDepth < IncludeMacroStack.size())
578 MaxIncludeStackDepth = IncludeMacroStack.size();
579
580 Lexer *TheLexer = new Lexer(SourceLocation::getFileLoc(FileID, 0), *this);
Chris Lattner4b009652007-07-25 00:24:17 +0000581 EnterSourceFileWithLexer(TheLexer, CurDir);
582}
583
584/// EnterSourceFile - Add a source file to the top of the include stack and
585/// start lexing tokens from it instead of the current buffer.
586void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
587 const DirectoryLookup *CurDir) {
588
589 // Add the current lexer to the include stack.
590 if (CurLexer || CurMacroExpander)
591 IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
592 CurMacroExpander));
593
594 CurLexer = TheLexer;
595 CurDirLookup = CurDir;
596 CurMacroExpander = 0;
597
598 // Notify the client, if desired, that we are in a new source file.
599 if (Callbacks && !CurLexer->Is_PragmaLexer) {
600 DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
601
602 // Get the file entry for the current file.
603 if (const FileEntry *FE =
604 SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
605 FileType = HeaderInfo.getFileDirFlavor(FE);
606
607 Callbacks->FileChanged(CurLexer->getFileLoc(),
608 PPCallbacks::EnterFile, FileType);
609 }
610}
611
612
613
614/// EnterMacro - Add a Macro to the top of the include stack and start lexing
615/// tokens from it instead of the current buffer.
616void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
617 IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
618 CurMacroExpander));
619 CurLexer = 0;
620 CurDirLookup = 0;
621
622 if (NumCachedMacroExpanders == 0) {
623 CurMacroExpander = new MacroExpander(Tok, Args, *this);
624 } else {
625 CurMacroExpander = MacroExpanderCache[--NumCachedMacroExpanders];
626 CurMacroExpander->Init(Tok, Args);
627 }
628}
629
630/// EnterTokenStream - Add a "macro" context to the top of the include stack,
631/// which will cause the lexer to start returning the specified tokens. Note
632/// that these tokens will be re-macro-expanded when/if expansion is enabled.
633/// This method assumes that the specified stream of tokens has a permanent
634/// owner somewhere, so they do not need to be copied.
635void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks) {
636 // Save our current state.
637 IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
638 CurMacroExpander));
639 CurLexer = 0;
640 CurDirLookup = 0;
641
642 // Create a macro expander to expand from the specified token stream.
643 if (NumCachedMacroExpanders == 0) {
644 CurMacroExpander = new MacroExpander(Toks, NumToks, *this);
645 } else {
646 CurMacroExpander = MacroExpanderCache[--NumCachedMacroExpanders];
647 CurMacroExpander->Init(Toks, NumToks);
648 }
649}
650
651/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
652/// lexer stack. This should only be used in situations where the current
653/// state of the top-of-stack lexer is known.
654void Preprocessor::RemoveTopOfLexerStack() {
655 assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
656
657 if (CurMacroExpander) {
658 // Delete or cache the now-dead macro expander.
659 if (NumCachedMacroExpanders == MacroExpanderCacheSize)
660 delete CurMacroExpander;
661 else
662 MacroExpanderCache[NumCachedMacroExpanders++] = CurMacroExpander;
663 } else {
664 delete CurLexer;
665 }
666 CurLexer = IncludeMacroStack.back().TheLexer;
667 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
668 CurMacroExpander = IncludeMacroStack.back().TheMacroExpander;
669 IncludeMacroStack.pop_back();
670}
671
672//===----------------------------------------------------------------------===//
673// Macro Expansion Handling.
674//===----------------------------------------------------------------------===//
675
Chris Lattner7a1b0882007-10-07 08:44:20 +0000676/// setMacroInfo - Specify a macro for this identifier.
677///
678void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
679 if (MI == 0) {
680 if (II->hasMacroDefinition()) {
681 Macros.erase(II);
682 II->setHasMacroDefinition(false);
683 }
684 } else {
685 Macros[II] = MI;
686 II->setHasMacroDefinition(true);
687 }
688}
689
Chris Lattner4b009652007-07-25 00:24:17 +0000690/// RegisterBuiltinMacro - Register the specified identifier in the identifier
691/// table and mark it as a builtin macro to be expanded.
692IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) {
693 // Get the identifier.
694 IdentifierInfo *Id = getIdentifierInfo(Name);
695
696 // Mark it as being a macro that is builtin.
697 MacroInfo *MI = new MacroInfo(SourceLocation());
698 MI->setIsBuiltinMacro();
Chris Lattner7a1b0882007-10-07 08:44:20 +0000699 setMacroInfo(Id, MI);
Chris Lattner4b009652007-07-25 00:24:17 +0000700 return Id;
701}
702
703
704/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
705/// identifier table.
706void Preprocessor::RegisterBuiltinMacros() {
707 Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
708 Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
709 Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
710 Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
711 Ident_Pragma = RegisterBuiltinMacro("_Pragma");
712
713 // GCC Extensions.
714 Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__");
715 Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
716 Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__");
717}
718
719/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
720/// in its expansion, currently expands to that token literally.
721static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
Chris Lattner7a1b0882007-10-07 08:44:20 +0000722 const IdentifierInfo *MacroIdent,
723 Preprocessor &PP) {
Chris Lattner4b009652007-07-25 00:24:17 +0000724 IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
725
726 // If the token isn't an identifier, it's always literally expanded.
727 if (II == 0) return true;
728
729 // If the identifier is a macro, and if that macro is enabled, it may be
730 // expanded so it's not a trivial expansion.
Chris Lattner7a1b0882007-10-07 08:44:20 +0000731 if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
Chris Lattner4b009652007-07-25 00:24:17 +0000732 // Fast expanding "#define X X" is ok, because X would be disabled.
733 II != MacroIdent)
734 return false;
735
736 // If this is an object-like macro invocation, it is safe to trivially expand
737 // it.
738 if (MI->isObjectLike()) return true;
739
740 // If this is a function-like macro invocation, it's safe to trivially expand
741 // as long as the identifier is not a macro argument.
742 for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
743 I != E; ++I)
744 if (*I == II)
745 return false; // Identifier is a macro argument.
746
747 return true;
748}
749
750
751/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
752/// lexed is a '('. If so, consume the token and return true, if not, this
753/// method should have no observable side-effect on the lexed tokens.
754bool Preprocessor::isNextPPTokenLParen() {
755 // Do some quick tests for rejection cases.
756 unsigned Val;
757 if (CurLexer)
758 Val = CurLexer->isNextPPTokenLParen();
759 else
760 Val = CurMacroExpander->isNextTokenLParen();
761
762 if (Val == 2) {
763 // We have run off the end. If it's a source file we don't
764 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
765 // macro stack.
766 if (CurLexer)
767 return false;
768 for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
769 IncludeStackInfo &Entry = IncludeMacroStack[i-1];
770 if (Entry.TheLexer)
771 Val = Entry.TheLexer->isNextPPTokenLParen();
772 else
773 Val = Entry.TheMacroExpander->isNextTokenLParen();
774
775 if (Val != 2)
776 break;
777
778 // Ran off the end of a source file?
779 if (Entry.TheLexer)
780 return false;
781 }
782 }
783
784 // Okay, if we know that the token is a '(', lex it and return. Otherwise we
785 // have found something that isn't a '(' or we found the end of the
786 // translation unit. In either case, return false.
787 if (Val != 1)
788 return false;
789
790 Token Tok;
791 LexUnexpandedToken(Tok);
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000792 assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
Chris Lattner4b009652007-07-25 00:24:17 +0000793 return true;
794}
795
796/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
797/// expanded as a macro, handle it and return the next token as 'Identifier'.
798bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
799 MacroInfo *MI) {
Chris Lattnerc834ea62008-01-07 19:50:27 +0000800 // If this is a macro exapnsion in the "#if !defined(x)" line for the file,
801 // then the macro could expand to different things in other contexts, we need
802 // to disable the optimization in this case.
803 if (CurLexer) CurLexer->MIOpt.ExpandedMacro();
Chris Lattner4b009652007-07-25 00:24:17 +0000804
805 // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
806 if (MI->isBuiltinMacro()) {
807 ExpandBuiltinMacro(Identifier);
808 return false;
809 }
810
Chris Lattner4b009652007-07-25 00:24:17 +0000811 /// Args - If this is a function-like macro expansion, this contains,
812 /// for each macro argument, the list of tokens that were provided to the
813 /// invocation.
814 MacroArgs *Args = 0;
815
816 // If this is a function-like macro, read the arguments.
817 if (MI->isFunctionLike()) {
818 // C99 6.10.3p10: If the preprocessing token immediately after the the macro
819 // name isn't a '(', this macro should not be expanded. Otherwise, consume
820 // it.
821 if (!isNextPPTokenLParen())
822 return true;
823
824 // Remember that we are now parsing the arguments to a macro invocation.
825 // Preprocessor directives used inside macro arguments are not portable, and
826 // this enables the warning.
827 InMacroArgs = true;
828 Args = ReadFunctionLikeMacroArgs(Identifier, MI);
829
830 // Finished parsing args.
831 InMacroArgs = false;
832
833 // If there was an error parsing the arguments, bail out.
834 if (Args == 0) return false;
835
836 ++NumFnMacroExpanded;
837 } else {
838 ++NumMacroExpanded;
839 }
840
841 // Notice that this macro has been used.
842 MI->setIsUsed(true);
843
844 // If we started lexing a macro, enter the macro expansion body.
845
846 // If this macro expands to no tokens, don't bother to push it onto the
847 // expansion stack, only to take it right back off.
848 if (MI->getNumTokens() == 0) {
849 // No need for arg info.
850 if (Args) Args->destroy();
851
852 // Ignore this macro use, just return the next token in the current
853 // buffer.
854 bool HadLeadingSpace = Identifier.hasLeadingSpace();
855 bool IsAtStartOfLine = Identifier.isAtStartOfLine();
856
857 Lex(Identifier);
858
859 // If the identifier isn't on some OTHER line, inherit the leading
860 // whitespace/first-on-a-line property of this token. This handles
861 // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is
862 // empty.
863 if (!Identifier.isAtStartOfLine()) {
864 if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
865 if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
866 }
867 ++NumFastMacroExpanded;
868 return false;
869
870 } else if (MI->getNumTokens() == 1 &&
Chris Lattner7a1b0882007-10-07 08:44:20 +0000871 isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
872 *this)){
Chris Lattner4b009652007-07-25 00:24:17 +0000873 // Otherwise, if this macro expands into a single trivially-expanded
874 // token: expand it now. This handles common cases like
875 // "#define VAL 42".
876
877 // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
878 // identifier to the expanded token.
879 bool isAtStartOfLine = Identifier.isAtStartOfLine();
880 bool hasLeadingSpace = Identifier.hasLeadingSpace();
881
882 // Remember where the token is instantiated.
883 SourceLocation InstantiateLoc = Identifier.getLocation();
884
885 // Replace the result token.
886 Identifier = MI->getReplacementToken(0);
887
888 // Restore the StartOfLine/LeadingSpace markers.
889 Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
890 Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
891
892 // Update the tokens location to include both its logical and physical
893 // locations.
894 SourceLocation Loc =
895 SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc);
896 Identifier.setLocation(Loc);
897
898 // If this is #define X X, we must mark the result as unexpandible.
899 if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
Chris Lattner7a1b0882007-10-07 08:44:20 +0000900 if (getMacroInfo(NewII) == MI)
Chris Lattner4b009652007-07-25 00:24:17 +0000901 Identifier.setFlag(Token::DisableExpand);
902
903 // Since this is not an identifier token, it can't be macro expanded, so
904 // we're done.
905 ++NumFastMacroExpanded;
906 return false;
907 }
908
909 // Start expanding the macro.
910 EnterMacro(Identifier, Args);
911
912 // Now that the macro is at the top of the include stack, ask the
913 // preprocessor to read the next token from it.
914 Lex(Identifier);
915 return false;
916}
917
918/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
919/// invoked to read all of the actual arguments specified for the macro
920/// invocation. This returns null on error.
921MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
922 MacroInfo *MI) {
923 // The number of fixed arguments to parse.
924 unsigned NumFixedArgsLeft = MI->getNumArgs();
925 bool isVariadic = MI->isVariadic();
926
927 // Outer loop, while there are more arguments, keep reading them.
928 Token Tok;
929 Tok.setKind(tok::comma);
930 --NumFixedArgsLeft; // Start reading the first arg.
931
932 // ArgTokens - Build up a list of tokens that make up each argument. Each
933 // argument is separated by an EOF token. Use a SmallVector so we can avoid
934 // heap allocations in the common case.
935 llvm::SmallVector<Token, 64> ArgTokens;
936
937 unsigned NumActuals = 0;
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000938 while (Tok.is(tok::comma)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000939 // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
940 // that we already consumed the first one.
941 unsigned NumParens = 0;
942
943 while (1) {
944 // Read arguments as unexpanded tokens. This avoids issues, e.g., where
945 // an argument value in a macro could expand to ',' or '(' or ')'.
946 LexUnexpandedToken(Tok);
947
Chris Lattnerecdf4f02008-01-22 19:34:51 +0000948 if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
Chris Lattner4b009652007-07-25 00:24:17 +0000949 Diag(MacroName, diag::err_unterm_macro_invoc);
Chris Lattnerecdf4f02008-01-22 19:34:51 +0000950 // Do not lose the EOF/EOM. Return it to the client.
Chris Lattner4b009652007-07-25 00:24:17 +0000951 MacroName = Tok;
952 return 0;
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000953 } else if (Tok.is(tok::r_paren)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000954 // If we found the ) token, the macro arg list is done.
955 if (NumParens-- == 0)
956 break;
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000957 } else if (Tok.is(tok::l_paren)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000958 ++NumParens;
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000959 } else if (Tok.is(tok::comma) && NumParens == 0) {
Chris Lattner4b009652007-07-25 00:24:17 +0000960 // Comma ends this argument if there are more fixed arguments expected.
961 if (NumFixedArgsLeft)
962 break;
963
964 // If this is not a variadic macro, too many args were specified.
965 if (!isVariadic) {
966 // Emit the diagnostic at the macro name in case there is a missing ).
967 // Emitting it at the , could be far away from the macro name.
968 Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
969 return 0;
970 }
971 // Otherwise, continue to add the tokens to this variable argument.
Chris Lattnercb8e41c2007-10-09 18:02:16 +0000972 } else if (Tok.is(tok::comment) && !KeepMacroComments) {
Chris Lattner4b009652007-07-25 00:24:17 +0000973 // If this is a comment token in the argument list and we're just in
974 // -C mode (not -CC mode), discard the comment.
975 continue;
Chris Lattnere373b052007-11-23 06:50:21 +0000976 } else if (Tok.is(tok::identifier)) {
977 // Reading macro arguments can cause macros that we are currently
978 // expanding from to be popped off the expansion stack. Doing so causes
979 // them to be reenabled for expansion. Here we record whether any
980 // identifiers we lex as macro arguments correspond to disabled macros.
981 // If so, we mark the token as noexpand. This is a subtle aspect of
982 // C99 6.10.3.4p2.
983 if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
984 if (!MI->isEnabled())
985 Tok.setFlag(Token::DisableExpand);
Chris Lattner4b009652007-07-25 00:24:17 +0000986 }
987
988 ArgTokens.push_back(Tok);
989 }
990
991 // Empty arguments are standard in C99 and supported as an extension in
992 // other modes.
993 if (ArgTokens.empty() && !Features.C99)
994 Diag(Tok, diag::ext_empty_fnmacro_arg);
995
996 // Add a marker EOF token to the end of the token list for this argument.
997 Token EOFTok;
998 EOFTok.startToken();
999 EOFTok.setKind(tok::eof);
1000 EOFTok.setLocation(Tok.getLocation());
1001 EOFTok.setLength(0);
1002 ArgTokens.push_back(EOFTok);
1003 ++NumActuals;
1004 --NumFixedArgsLeft;
1005 };
1006
1007 // Okay, we either found the r_paren. Check to see if we parsed too few
1008 // arguments.
1009 unsigned MinArgsExpected = MI->getNumArgs();
1010
1011 // See MacroArgs instance var for description of this.
1012 bool isVarargsElided = false;
1013
1014 if (NumActuals < MinArgsExpected) {
1015 // There are several cases where too few arguments is ok, handle them now.
1016 if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) {
1017 // Varargs where the named vararg parameter is missing: ok as extension.
1018 // #define A(x, ...)
1019 // A("blah")
1020 Diag(Tok, diag::ext_missing_varargs_arg);
1021
1022 // Remember this occurred if this is a C99 macro invocation with at least
1023 // one actual argument.
1024 isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1;
1025 } else if (MI->getNumArgs() == 1) {
1026 // #define A(x)
1027 // A()
1028 // is ok because it is an empty argument.
1029
1030 // Empty arguments are standard in C99 and supported as an extension in
1031 // other modes.
1032 if (ArgTokens.empty() && !Features.C99)
1033 Diag(Tok, diag::ext_empty_fnmacro_arg);
1034 } else {
1035 // Otherwise, emit the error.
1036 Diag(Tok, diag::err_too_few_args_in_macro_invoc);
1037 return 0;
1038 }
1039
1040 // Add a marker EOF token to the end of the token list for this argument.
1041 SourceLocation EndLoc = Tok.getLocation();
1042 Tok.startToken();
1043 Tok.setKind(tok::eof);
1044 Tok.setLocation(EndLoc);
1045 Tok.setLength(0);
1046 ArgTokens.push_back(Tok);
1047 }
1048
1049 return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided);
1050}
1051
1052/// ComputeDATE_TIME - Compute the current time, enter it into the specified
1053/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
1054/// the identifier tokens inserted.
1055static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
1056 Preprocessor &PP) {
1057 time_t TT = time(0);
1058 struct tm *TM = localtime(&TT);
1059
1060 static const char * const Months[] = {
1061 "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
1062 };
1063
1064 char TmpBuffer[100];
1065 sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
1066 TM->tm_year+1900);
1067 DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
1068
1069 sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
1070 TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
1071}
1072
1073/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
1074/// as a builtin macro, handle it and return the next token as 'Tok'.
1075void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
1076 // Figure out which token this is.
1077 IdentifierInfo *II = Tok.getIdentifierInfo();
1078 assert(II && "Can't be a macro without id info!");
1079
1080 // If this is an _Pragma directive, expand it, invoke the pragma handler, then
1081 // lex the token after it.
1082 if (II == Ident_Pragma)
1083 return Handle_Pragma(Tok);
1084
1085 ++NumBuiltinMacroExpanded;
1086
1087 char TmpBuffer[100];
1088
1089 // Set up the return result.
1090 Tok.setIdentifierInfo(0);
1091 Tok.clearFlag(Token::NeedsCleaning);
1092
1093 if (II == Ident__LINE__) {
1094 // __LINE__ expands to a simple numeric value.
1095 sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation()));
1096 unsigned Length = strlen(TmpBuffer);
1097 Tok.setKind(tok::numeric_constant);
1098 Tok.setLength(Length);
1099 Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
1100 } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
1101 SourceLocation Loc = Tok.getLocation();
1102 if (II == Ident__BASE_FILE__) {
1103 Diag(Tok, diag::ext_pp_base_file);
1104 SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc);
1105 while (NextLoc.isValid()) {
1106 Loc = NextLoc;
1107 NextLoc = SourceMgr.getIncludeLoc(Loc);
1108 }
1109 }
1110
1111 // Escape this filename. Turn '\' -> '\\' '"' -> '\"'
1112 std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc));
1113 FN = '"' + Lexer::Stringify(FN) + '"';
1114 Tok.setKind(tok::string_literal);
1115 Tok.setLength(FN.size());
1116 Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
1117 } else if (II == Ident__DATE__) {
1118 if (!DATELoc.isValid())
1119 ComputeDATE_TIME(DATELoc, TIMELoc, *this);
1120 Tok.setKind(tok::string_literal);
1121 Tok.setLength(strlen("\"Mmm dd yyyy\""));
1122 Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation()));
1123 } else if (II == Ident__TIME__) {
1124 if (!TIMELoc.isValid())
1125 ComputeDATE_TIME(DATELoc, TIMELoc, *this);
1126 Tok.setKind(tok::string_literal);
1127 Tok.setLength(strlen("\"hh:mm:ss\""));
1128 Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation()));
1129 } else if (II == Ident__INCLUDE_LEVEL__) {
1130 Diag(Tok, diag::ext_pp_include_level);
1131
1132 // Compute the include depth of this token.
1133 unsigned Depth = 0;
1134 SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation());
1135 for (; Loc.isValid(); ++Depth)
1136 Loc = SourceMgr.getIncludeLoc(Loc);
1137
1138 // __INCLUDE_LEVEL__ expands to a simple numeric value.
1139 sprintf(TmpBuffer, "%u", Depth);
1140 unsigned Length = strlen(TmpBuffer);
1141 Tok.setKind(tok::numeric_constant);
1142 Tok.setLength(Length);
1143 Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
1144 } else if (II == Ident__TIMESTAMP__) {
1145 // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
1146 // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
1147 Diag(Tok, diag::ext_pp_timestamp);
1148
1149 // Get the file that we are lexing out of. If we're currently lexing from
1150 // a macro, dig into the include stack.
1151 const FileEntry *CurFile = 0;
1152 Lexer *TheLexer = getCurrentFileLexer();
1153
1154 if (TheLexer)
1155 CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
1156
1157 // If this file is older than the file it depends on, emit a diagnostic.
1158 const char *Result;
1159 if (CurFile) {
1160 time_t TT = CurFile->getModificationTime();
1161 struct tm *TM = localtime(&TT);
1162 Result = asctime(TM);
1163 } else {
1164 Result = "??? ??? ?? ??:??:?? ????\n";
1165 }
1166 TmpBuffer[0] = '"';
1167 strcpy(TmpBuffer+1, Result);
1168 unsigned Len = strlen(TmpBuffer);
1169 TmpBuffer[Len-1] = '"'; // Replace the newline with a quote.
1170 Tok.setKind(tok::string_literal);
1171 Tok.setLength(Len);
1172 Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation()));
1173 } else {
1174 assert(0 && "Unknown identifier!");
Chris Lattnerc0f7c512007-12-09 20:31:55 +00001175 }
Chris Lattner4b009652007-07-25 00:24:17 +00001176}
1177
1178//===----------------------------------------------------------------------===//
1179// Lexer Event Handling.
1180//===----------------------------------------------------------------------===//
1181
1182/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
1183/// identifier information for the token and install it into the token.
1184IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
1185 const char *BufPtr) {
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001186 assert(Identifier.is(tok::identifier) && "Not an identifier!");
Chris Lattner4b009652007-07-25 00:24:17 +00001187 assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
1188
1189 // Look up this token, see if it is a macro, or if it is a language keyword.
1190 IdentifierInfo *II;
1191 if (BufPtr && !Identifier.needsCleaning()) {
1192 // No cleaning needed, just use the characters from the lexed buffer.
1193 II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength());
1194 } else {
1195 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
1196 llvm::SmallVector<char, 64> IdentifierBuffer;
1197 IdentifierBuffer.resize(Identifier.getLength());
1198 const char *TmpBuf = &IdentifierBuffer[0];
1199 unsigned Size = getSpelling(Identifier, TmpBuf);
1200 II = getIdentifierInfo(TmpBuf, TmpBuf+Size);
1201 }
1202 Identifier.setIdentifierInfo(II);
1203 return II;
1204}
1205
1206
1207/// HandleIdentifier - This callback is invoked when the lexer reads an
1208/// identifier. This callback looks up the identifier in the map and/or
1209/// potentially macro expands it or turns it into a named token (like 'for').
1210void Preprocessor::HandleIdentifier(Token &Identifier) {
1211 assert(Identifier.getIdentifierInfo() &&
1212 "Can't handle identifiers without identifier info!");
1213
1214 IdentifierInfo &II = *Identifier.getIdentifierInfo();
1215
1216 // If this identifier was poisoned, and if it was not produced from a macro
1217 // expansion, emit an error.
1218 if (II.isPoisoned() && CurLexer) {
1219 if (&II != Ident__VA_ARGS__) // We warn about __VA_ARGS__ with poisoning.
1220 Diag(Identifier, diag::err_pp_used_poisoned_id);
1221 else
1222 Diag(Identifier, diag::ext_pp_bad_vaargs_use);
1223 }
1224
1225 // If this is a macro to be expanded, do it.
Chris Lattner7a1b0882007-10-07 08:44:20 +00001226 if (MacroInfo *MI = getMacroInfo(&II)) {
Chris Lattner4b009652007-07-25 00:24:17 +00001227 if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) {
1228 if (MI->isEnabled()) {
1229 if (!HandleMacroExpandedIdentifier(Identifier, MI))
1230 return;
1231 } else {
1232 // C99 6.10.3.4p2 says that a disabled macro may never again be
1233 // expanded, even if it's in a context where it could be expanded in the
1234 // future.
1235 Identifier.setFlag(Token::DisableExpand);
1236 }
1237 }
Chris Lattner4b009652007-07-25 00:24:17 +00001238 }
1239
1240 // C++ 2.11p2: If this is an alternative representation of a C++ operator,
1241 // then we act as if it is the actual operator and not the textual
1242 // representation of it.
1243 if (II.isCPlusPlusOperatorKeyword())
1244 Identifier.setIdentifierInfo(0);
1245
1246 // Change the kind of this identifier to the appropriate token kind, e.g.
1247 // turning "for" into a keyword.
1248 Identifier.setKind(II.getTokenID());
1249
1250 // If this is an extension token, diagnose its use.
1251 // FIXME: tried (unsuccesfully) to shut this up when compiling with gnu99
1252 // For now, I'm just commenting it out (while I work on attributes).
1253 if (II.isExtensionToken() && Features.C99)
1254 Diag(Identifier, diag::ext_token_used);
1255}
1256
1257/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
1258/// the current file. This either returns the EOF token or pops a level off
1259/// the include stack and keeps going.
1260bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
1261 assert(!CurMacroExpander &&
1262 "Ending a file when currently in a macro!");
1263
1264 // See if this file had a controlling macro.
1265 if (CurLexer) { // Not ending a macro, ignore it.
1266 if (const IdentifierInfo *ControllingMacro =
1267 CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
1268 // Okay, this has a controlling macro, remember in PerFileInfo.
1269 if (const FileEntry *FE =
1270 SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
1271 HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
1272 }
1273 }
1274
1275 // If this is a #include'd file, pop it off the include stack and continue
1276 // lexing the #includer file.
1277 if (!IncludeMacroStack.empty()) {
1278 // We're done with the #included file.
1279 RemoveTopOfLexerStack();
1280
1281 // Notify the client, if desired, that we are in a new source file.
1282 if (Callbacks && !isEndOfMacro && CurLexer) {
1283 DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
1284
1285 // Get the file entry for the current file.
1286 if (const FileEntry *FE =
1287 SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
1288 FileType = HeaderInfo.getFileDirFlavor(FE);
1289
1290 Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr),
1291 PPCallbacks::ExitFile, FileType);
1292 }
1293
1294 // Client should lex another token.
1295 return false;
1296 }
Chris Lattner1d34a7c2008-01-25 00:00:30 +00001297
1298 // If the file ends with a newline, form the EOF token on the newline itself,
1299 // rather than "on the line following it", which doesn't exist. This makes
1300 // diagnostics relating to the end of file include the last file that the user
1301 // actually typed, which is goodness.
1302 const char *EndPos = CurLexer->BufferEnd;
1303 if (EndPos != CurLexer->BufferStart &&
1304 (EndPos[-1] == '\n' || EndPos[-1] == '\r')) {
1305 --EndPos;
1306
1307 // Handle \n\r and \r\n:
1308 if (EndPos != CurLexer->BufferStart &&
1309 (EndPos[-1] == '\n' || EndPos[-1] == '\r') &&
1310 EndPos[-1] != EndPos[0])
1311 --EndPos;
1312 }
Chris Lattner4b009652007-07-25 00:24:17 +00001313
1314 Result.startToken();
Chris Lattner1d34a7c2008-01-25 00:00:30 +00001315 CurLexer->BufferPtr = EndPos;
1316 CurLexer->FormTokenWithChars(Result, EndPos);
Chris Lattner4b009652007-07-25 00:24:17 +00001317 Result.setKind(tok::eof);
1318
1319 // We're done with the #included file.
1320 delete CurLexer;
1321 CurLexer = 0;
1322
1323 // This is the end of the top-level file. If the diag::pp_macro_not_used
Chris Lattner7a1b0882007-10-07 08:44:20 +00001324 // diagnostic is enabled, look for macros that have not been used.
Chris Lattner4b009652007-07-25 00:24:17 +00001325 if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){
Chris Lattner7a1b0882007-10-07 08:44:20 +00001326 for (llvm::DenseMap<IdentifierInfo*, MacroInfo*>::iterator I =
1327 Macros.begin(), E = Macros.end(); I != E; ++I) {
1328 if (!I->second->isUsed())
1329 Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used);
Chris Lattner4b009652007-07-25 00:24:17 +00001330 }
1331 }
Chris Lattner4b009652007-07-25 00:24:17 +00001332 return true;
1333}
1334
1335/// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
1336/// the current macro expansion or token stream expansion.
1337bool Preprocessor::HandleEndOfMacro(Token &Result) {
1338 assert(CurMacroExpander && !CurLexer &&
1339 "Ending a macro when currently in a #include file!");
1340
1341 // Delete or cache the now-dead macro expander.
1342 if (NumCachedMacroExpanders == MacroExpanderCacheSize)
1343 delete CurMacroExpander;
1344 else
1345 MacroExpanderCache[NumCachedMacroExpanders++] = CurMacroExpander;
1346
1347 // Handle this like a #include file being popped off the stack.
1348 CurMacroExpander = 0;
1349 return HandleEndOfFile(Result, true);
1350}
1351
Chris Lattner64b32ec2008-02-07 06:03:59 +00001352/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
1353/// comment (/##/) in microsoft mode, this method handles updating the current
1354/// state, returning the token on the next source line.
1355void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
1356 assert(CurMacroExpander && !CurLexer &&
1357 "Pasted comment can only be formed from macro");
1358
1359 // We handle this by scanning for the closest real lexer, switching it to
1360 // raw mode and preprocessor mode. This will cause it to return \n as an
1361 // explicit EOM token.
1362 Lexer *FoundLexer = 0;
1363 bool LexerWasInPPMode = false;
1364 for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
1365 IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
1366 if (ISI.TheLexer == 0) continue; // Scan for a real lexer.
1367
1368 // Once we find a real lexer, mark it as raw mode (disabling macro
1369 // expansions) and preprocessor mode (return EOM). We know that the lexer
1370 // was *not* in raw mode before, because the macro that the comment came
1371 // from was expanded. However, it could have already been in preprocessor
1372 // mode (#if COMMENT) in which case we have to return it to that mode and
1373 // return EOM.
1374 FoundLexer = ISI.TheLexer;
1375 FoundLexer->LexingRawMode = true;
1376 LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
1377 FoundLexer->ParsingPreprocessorDirective = true;
1378 break;
1379 }
1380
1381 // Okay, we either found and switched over the lexer, or we didn't find a
1382 // lexer. In either case, finish off the macro the comment came from, getting
1383 // the next token.
1384 if (!HandleEndOfMacro(Tok)) Lex(Tok);
1385
1386 // Discarding comments as long as we don't have EOF or EOM. This 'comments
1387 // out' the rest of the line, including any tokens that came from other macros
1388 // that were active, as in:
1389 // #define submacro a COMMENT b
1390 // submacro c
1391 // which should lex to 'a' only: 'b' and 'c' should be removed.
1392 while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof))
1393 Lex(Tok);
1394
1395 // If we got an eom token, then we successfully found the end of the line.
1396 if (Tok.is(tok::eom)) {
1397 assert(FoundLexer && "Can't get end of line without an active lexer");
1398 // Restore the lexer back to normal mode instead of raw mode.
1399 FoundLexer->LexingRawMode = false;
1400
1401 // If the lexer was already in preprocessor mode, just return the EOM token
1402 // to finish the preprocessor line.
1403 if (LexerWasInPPMode) return;
1404
1405 // Otherwise, switch out of PP mode and return the next lexed token.
1406 FoundLexer->ParsingPreprocessorDirective = false;
1407 return Lex(Tok);
1408 }
1409
1410 // If we got an EOF token, then we reached the end of the token stream but
1411 // didn't find an explicit \n. This can only happen if there was no lexer
1412 // active (an active lexer would return EOM at EOF if there was no \n in
1413 // preprocessor directive mode), so just return EOF as our token.
1414 assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode");
1415 return;
1416}
Chris Lattner4b009652007-07-25 00:24:17 +00001417
1418//===----------------------------------------------------------------------===//
1419// Utility Methods for Preprocessor Directive Handling.
1420//===----------------------------------------------------------------------===//
1421
1422/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
1423/// current line until the tok::eom token is found.
1424void Preprocessor::DiscardUntilEndOfDirective() {
1425 Token Tmp;
1426 do {
1427 LexUnexpandedToken(Tmp);
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001428 } while (Tmp.isNot(tok::eom));
Chris Lattner4b009652007-07-25 00:24:17 +00001429}
1430
1431/// isCXXNamedOperator - Returns "true" if the token is a named operator in C++.
1432static bool isCXXNamedOperator(const std::string &Spelling) {
1433 return Spelling == "and" || Spelling == "bitand" || Spelling == "bitor" ||
1434 Spelling == "compl" || Spelling == "not" || Spelling == "not_eq" ||
1435 Spelling == "or" || Spelling == "xor";
1436}
1437
1438/// ReadMacroName - Lex and validate a macro name, which occurs after a
1439/// #define or #undef. This sets the token kind to eom and discards the rest
1440/// of the macro line if the macro name is invalid. isDefineUndef is 1 if
1441/// this is due to a a #define, 2 if #undef directive, 0 if it is something
1442/// else (e.g. #ifdef).
1443void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) {
1444 // Read the token, don't allow macro expansion on it.
1445 LexUnexpandedToken(MacroNameTok);
1446
1447 // Missing macro name?
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001448 if (MacroNameTok.is(tok::eom))
Chris Lattner4b009652007-07-25 00:24:17 +00001449 return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
1450
1451 IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1452 if (II == 0) {
1453 std::string Spelling = getSpelling(MacroNameTok);
1454 if (isCXXNamedOperator(Spelling))
1455 // C++ 2.5p2: Alternative tokens behave the same as its primary token
1456 // except for their spellings.
1457 Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name, Spelling);
1458 else
1459 Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
1460 // Fall through on error.
1461 } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) {
1462 // Error if defining "defined": C99 6.10.8.4.
1463 Diag(MacroNameTok, diag::err_defined_macro_name);
Chris Lattner3b56a012007-10-07 08:04:56 +00001464 } else if (isDefineUndef && II->hasMacroDefinition() &&
Chris Lattner7a1b0882007-10-07 08:44:20 +00001465 getMacroInfo(II)->isBuiltinMacro()) {
Chris Lattner4b009652007-07-25 00:24:17 +00001466 // Error if defining "__LINE__" and other builtins: C99 6.10.8.4.
1467 if (isDefineUndef == 1)
1468 Diag(MacroNameTok, diag::pp_redef_builtin_macro);
1469 else
1470 Diag(MacroNameTok, diag::pp_undef_builtin_macro);
1471 } else {
1472 // Okay, we got a good identifier node. Return it.
1473 return;
1474 }
1475
1476 // Invalid macro name, read and discard the rest of the line. Then set the
1477 // token kind to tok::eom.
1478 MacroNameTok.setKind(tok::eom);
1479 return DiscardUntilEndOfDirective();
1480}
1481
1482/// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If
1483/// not, emit a diagnostic and consume up until the eom.
1484void Preprocessor::CheckEndOfDirective(const char *DirType) {
1485 Token Tmp;
Chris Lattner2d2216b2008-02-16 01:20:36 +00001486 // Lex unexpanded tokens: macros might expand to zero tokens, causing us to
1487 // miss diagnosing invalid lines.
1488 LexUnexpandedToken(Tmp);
1489
Chris Lattner4b009652007-07-25 00:24:17 +00001490 // There should be no tokens after the directive, but we allow them as an
1491 // extension.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001492 while (Tmp.is(tok::comment)) // Skip comments in -C mode.
Chris Lattner2d2216b2008-02-16 01:20:36 +00001493 LexUnexpandedToken(Tmp);
Chris Lattner4b009652007-07-25 00:24:17 +00001494
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001495 if (Tmp.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +00001496 Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType);
1497 DiscardUntilEndOfDirective();
1498 }
1499}
1500
1501
1502
1503/// SkipExcludedConditionalBlock - We just read a #if or related directive and
1504/// decided that the subsequent tokens are in the #if'd out portion of the
1505/// file. Lex the rest of the file, until we see an #endif. If
1506/// FoundNonSkipPortion is true, then we have already emitted code for part of
1507/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
1508/// is true, then #else directives are ok, if not, then we have already seen one
1509/// so a #else directive is a duplicate. When this returns, the caller can lex
1510/// the first valid token.
1511void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1512 bool FoundNonSkipPortion,
1513 bool FoundElse) {
1514 ++NumSkipped;
1515 assert(CurMacroExpander == 0 && CurLexer &&
1516 "Lexing a macro, not a file?");
1517
1518 CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false,
1519 FoundNonSkipPortion, FoundElse);
1520
1521 // Enter raw mode to disable identifier lookup (and thus macro expansion),
1522 // disabling warnings, etc.
1523 CurLexer->LexingRawMode = true;
1524 Token Tok;
1525 while (1) {
1526 CurLexer->Lex(Tok);
1527
1528 // If this is the end of the buffer, we have an error.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001529 if (Tok.is(tok::eof)) {
Chris Lattner4b009652007-07-25 00:24:17 +00001530 // Emit errors for each unterminated conditional on the stack, including
1531 // the current one.
1532 while (!CurLexer->ConditionalStack.empty()) {
1533 Diag(CurLexer->ConditionalStack.back().IfLoc,
1534 diag::err_pp_unterminated_conditional);
1535 CurLexer->ConditionalStack.pop_back();
1536 }
1537
1538 // Just return and let the caller lex after this #include.
1539 break;
1540 }
1541
1542 // If this token is not a preprocessor directive, just skip it.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001543 if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
Chris Lattner4b009652007-07-25 00:24:17 +00001544 continue;
1545
1546 // We just parsed a # character at the start of a line, so we're in
1547 // directive mode. Tell the lexer this so any newlines we see will be
1548 // converted into an EOM token (this terminates the macro).
1549 CurLexer->ParsingPreprocessorDirective = true;
1550 CurLexer->KeepCommentMode = false;
1551
1552
1553 // Read the next token, the directive flavor.
1554 LexUnexpandedToken(Tok);
1555
1556 // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
1557 // something bogus), skip it.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001558 if (Tok.isNot(tok::identifier)) {
Chris Lattner4b009652007-07-25 00:24:17 +00001559 CurLexer->ParsingPreprocessorDirective = false;
1560 // Restore comment saving mode.
1561 CurLexer->KeepCommentMode = KeepComments;
1562 continue;
1563 }
1564
1565 // If the first letter isn't i or e, it isn't intesting to us. We know that
1566 // this is safe in the face of spelling differences, because there is no way
1567 // to spell an i/e in a strange way that is another letter. Skipping this
1568 // allows us to avoid looking up the identifier info for #define/#undef and
1569 // other common directives.
1570 const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation());
1571 char FirstChar = RawCharData[0];
1572 if (FirstChar >= 'a' && FirstChar <= 'z' &&
1573 FirstChar != 'i' && FirstChar != 'e') {
1574 CurLexer->ParsingPreprocessorDirective = false;
1575 // Restore comment saving mode.
1576 CurLexer->KeepCommentMode = KeepComments;
1577 continue;
1578 }
1579
1580 // Get the identifier name without trigraphs or embedded newlines. Note
1581 // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
1582 // when skipping.
1583 // TODO: could do this with zero copies in the no-clean case by using
1584 // strncmp below.
1585 char Directive[20];
1586 unsigned IdLen;
1587 if (!Tok.needsCleaning() && Tok.getLength() < 20) {
1588 IdLen = Tok.getLength();
1589 memcpy(Directive, RawCharData, IdLen);
1590 Directive[IdLen] = 0;
1591 } else {
1592 std::string DirectiveStr = getSpelling(Tok);
1593 IdLen = DirectiveStr.size();
1594 if (IdLen >= 20) {
1595 CurLexer->ParsingPreprocessorDirective = false;
1596 // Restore comment saving mode.
1597 CurLexer->KeepCommentMode = KeepComments;
1598 continue;
1599 }
1600 memcpy(Directive, &DirectiveStr[0], IdLen);
1601 Directive[IdLen] = 0;
1602 }
1603
1604 if (FirstChar == 'i' && Directive[1] == 'f') {
1605 if ((IdLen == 2) || // "if"
1606 (IdLen == 5 && !strcmp(Directive+2, "def")) || // "ifdef"
1607 (IdLen == 6 && !strcmp(Directive+2, "ndef"))) { // "ifndef"
1608 // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
1609 // bother parsing the condition.
1610 DiscardUntilEndOfDirective();
1611 CurLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
1612 /*foundnonskip*/false,
1613 /*fnddelse*/false);
1614 }
1615 } else if (FirstChar == 'e') {
1616 if (IdLen == 5 && !strcmp(Directive+1, "ndif")) { // "endif"
1617 CheckEndOfDirective("#endif");
1618 PPConditionalInfo CondInfo;
1619 CondInfo.WasSkipping = true; // Silence bogus warning.
1620 bool InCond = CurLexer->popConditionalLevel(CondInfo);
1621 InCond = InCond; // Silence warning in no-asserts mode.
1622 assert(!InCond && "Can't be skipping if not in a conditional!");
1623
1624 // If we popped the outermost skipping block, we're done skipping!
1625 if (!CondInfo.WasSkipping)
1626 break;
1627 } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else".
1628 // #else directive in a skipping conditional. If not in some other
1629 // skipping conditional, and if #else hasn't already been seen, enter it
1630 // as a non-skipping conditional.
1631 CheckEndOfDirective("#else");
1632 PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
1633
1634 // If this is a #else with a #else before it, report the error.
1635 if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else);
1636
1637 // Note that we've seen a #else in this conditional.
1638 CondInfo.FoundElse = true;
1639
1640 // If the conditional is at the top level, and the #if block wasn't
1641 // entered, enter the #else block now.
1642 if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
1643 CondInfo.FoundNonSkip = true;
1644 break;
1645 }
1646 } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) { // "elif".
1647 PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
1648
1649 bool ShouldEnter;
1650 // If this is in a skipping block or if we're already handled this #if
1651 // block, don't bother parsing the condition.
1652 if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
1653 DiscardUntilEndOfDirective();
1654 ShouldEnter = false;
1655 } else {
1656 // Restore the value of LexingRawMode so that identifiers are
1657 // looked up, etc, inside the #elif expression.
1658 assert(CurLexer->LexingRawMode && "We have to be skipping here!");
1659 CurLexer->LexingRawMode = false;
1660 IdentifierInfo *IfNDefMacro = 0;
1661 ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro);
1662 CurLexer->LexingRawMode = true;
1663 }
1664
1665 // If this is a #elif with a #else before it, report the error.
1666 if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else);
1667
1668 // If this condition is true, enter it!
1669 if (ShouldEnter) {
1670 CondInfo.FoundNonSkip = true;
1671 break;
1672 }
1673 }
1674 }
1675
1676 CurLexer->ParsingPreprocessorDirective = false;
1677 // Restore comment saving mode.
1678 CurLexer->KeepCommentMode = KeepComments;
1679 }
1680
1681 // Finally, if we are out of the conditional (saw an #endif or ran off the end
1682 // of the file, just stop skipping and return to lexing whatever came after
1683 // the #if block.
1684 CurLexer->LexingRawMode = false;
1685}
1686
1687//===----------------------------------------------------------------------===//
1688// Preprocessor Directive Handling.
1689//===----------------------------------------------------------------------===//
1690
1691/// HandleDirective - This callback is invoked when the lexer sees a # token
1692/// at the start of a line. This consumes the directive, modifies the
1693/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1694/// read is the correct one.
1695void Preprocessor::HandleDirective(Token &Result) {
1696 // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1697
1698 // We just parsed a # character at the start of a line, so we're in directive
1699 // mode. Tell the lexer this so any newlines we see will be converted into an
1700 // EOM token (which terminates the directive).
1701 CurLexer->ParsingPreprocessorDirective = true;
1702
1703 ++NumDirectives;
1704
1705 // We are about to read a token. For the multiple-include optimization FA to
1706 // work, we have to remember if we had read any tokens *before* this
1707 // pp-directive.
1708 bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
1709
1710 // Read the next token, the directive flavor. This isn't expanded due to
1711 // C99 6.10.3p8.
1712 LexUnexpandedToken(Result);
1713
1714 // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:
1715 // #define A(x) #x
1716 // A(abc
1717 // #warning blah
1718 // def)
1719 // If so, the user is relying on non-portable behavior, emit a diagnostic.
1720 if (InMacroArgs)
1721 Diag(Result, diag::ext_embedded_directive);
1722
1723TryAgain:
1724 switch (Result.getKind()) {
1725 case tok::eom:
1726 return; // null directive.
1727 case tok::comment:
1728 // Handle stuff like "# /*foo*/ define X" in -E -C mode.
1729 LexUnexpandedToken(Result);
1730 goto TryAgain;
1731
1732 case tok::numeric_constant:
1733 // FIXME: implement # 7 line numbers!
1734 DiscardUntilEndOfDirective();
1735 return;
1736 default:
1737 IdentifierInfo *II = Result.getIdentifierInfo();
1738 if (II == 0) break; // Not an identifier.
1739
1740 // Ask what the preprocessor keyword ID is.
1741 switch (II->getPPKeywordID()) {
1742 default: break;
1743 // C99 6.10.1 - Conditional Inclusion.
1744 case tok::pp_if:
1745 return HandleIfDirective(Result, ReadAnyTokensBeforeDirective);
1746 case tok::pp_ifdef:
1747 return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
1748 case tok::pp_ifndef:
1749 return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
1750 case tok::pp_elif:
1751 return HandleElifDirective(Result);
1752 case tok::pp_else:
1753 return HandleElseDirective(Result);
1754 case tok::pp_endif:
1755 return HandleEndifDirective(Result);
1756
1757 // C99 6.10.2 - Source File Inclusion.
1758 case tok::pp_include:
1759 return HandleIncludeDirective(Result); // Handle #include.
1760
1761 // C99 6.10.3 - Macro Replacement.
1762 case tok::pp_define:
Chris Lattnerfc457002008-03-05 01:18:20 +00001763 return HandleDefineDirective(Result);
Chris Lattner4b009652007-07-25 00:24:17 +00001764 case tok::pp_undef:
1765 return HandleUndefDirective(Result);
1766
1767 // C99 6.10.4 - Line Control.
1768 case tok::pp_line:
1769 // FIXME: implement #line
1770 DiscardUntilEndOfDirective();
1771 return;
1772
1773 // C99 6.10.5 - Error Directive.
1774 case tok::pp_error:
1775 return HandleUserDiagnosticDirective(Result, false);
1776
1777 // C99 6.10.6 - Pragma Directive.
1778 case tok::pp_pragma:
1779 return HandlePragmaDirective();
1780
1781 // GNU Extensions.
1782 case tok::pp_import:
1783 return HandleImportDirective(Result);
1784 case tok::pp_include_next:
1785 return HandleIncludeNextDirective(Result);
1786
1787 case tok::pp_warning:
1788 Diag(Result, diag::ext_pp_warning_directive);
1789 return HandleUserDiagnosticDirective(Result, true);
1790 case tok::pp_ident:
1791 return HandleIdentSCCSDirective(Result);
1792 case tok::pp_sccs:
1793 return HandleIdentSCCSDirective(Result);
1794 case tok::pp_assert:
1795 //isExtension = true; // FIXME: implement #assert
1796 break;
1797 case tok::pp_unassert:
1798 //isExtension = true; // FIXME: implement #unassert
1799 break;
Chris Lattner4b009652007-07-25 00:24:17 +00001800 }
1801 break;
1802 }
1803
1804 // If we reached here, the preprocessing token is not valid!
1805 Diag(Result, diag::err_pp_invalid_directive);
1806
1807 // Read the rest of the PP line.
1808 DiscardUntilEndOfDirective();
1809
1810 // Okay, we're done parsing the directive.
1811}
1812
1813void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1814 bool isWarning) {
1815 // Read the rest of the line raw. We do this because we don't want macros
1816 // to be expanded and we don't require that the tokens be valid preprocessing
1817 // tokens. For example, this is allowed: "#warning ` 'foo". GCC does
1818 // collapse multiple consequtive white space between tokens, but this isn't
1819 // specified by the standard.
1820 std::string Message = CurLexer->ReadToEndOfLine();
1821
1822 unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error;
1823 return Diag(Tok, DiagID, Message);
1824}
1825
1826/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1827///
1828void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1829 // Yes, this directive is an extension.
1830 Diag(Tok, diag::ext_pp_ident_directive);
1831
1832 // Read the string argument.
1833 Token StrTok;
1834 Lex(StrTok);
1835
1836 // If the token kind isn't a string, it's a malformed directive.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001837 if (StrTok.isNot(tok::string_literal) &&
1838 StrTok.isNot(tok::wide_string_literal))
Chris Lattner4b009652007-07-25 00:24:17 +00001839 return Diag(StrTok, diag::err_pp_malformed_ident);
1840
1841 // Verify that there is nothing after the string, other than EOM.
1842 CheckEndOfDirective("#ident");
1843
1844 if (Callbacks)
1845 Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok));
1846}
1847
1848//===----------------------------------------------------------------------===//
1849// Preprocessor Include Directive Handling.
1850//===----------------------------------------------------------------------===//
1851
1852/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1853/// checked and spelled filename, e.g. as an operand of #include. This returns
1854/// true if the input filename was in <>'s or false if it were in ""'s. The
1855/// caller is expected to provide a buffer that is large enough to hold the
1856/// spelling of the filename, but is also expected to handle the case when
1857/// this method decides to use a different buffer.
1858bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1859 const char *&BufStart,
1860 const char *&BufEnd) {
1861 // Get the text form of the filename.
1862 assert(BufStart != BufEnd && "Can't have tokens with empty spellings!");
1863
1864 // Make sure the filename is <x> or "x".
1865 bool isAngled;
1866 if (BufStart[0] == '<') {
1867 if (BufEnd[-1] != '>') {
1868 Diag(Loc, diag::err_pp_expects_filename);
1869 BufStart = 0;
1870 return true;
1871 }
1872 isAngled = true;
1873 } else if (BufStart[0] == '"') {
1874 if (BufEnd[-1] != '"') {
1875 Diag(Loc, diag::err_pp_expects_filename);
1876 BufStart = 0;
1877 return true;
1878 }
1879 isAngled = false;
1880 } else {
1881 Diag(Loc, diag::err_pp_expects_filename);
1882 BufStart = 0;
1883 return true;
1884 }
1885
1886 // Diagnose #include "" as invalid.
1887 if (BufEnd-BufStart <= 2) {
1888 Diag(Loc, diag::err_pp_empty_filename);
1889 BufStart = 0;
1890 return "";
1891 }
1892
1893 // Skip the brackets.
1894 ++BufStart;
1895 --BufEnd;
1896 return isAngled;
1897}
1898
1899/// ConcatenateIncludeName - Handle cases where the #include name is expanded
1900/// from a macro as multiple tokens, which need to be glued together. This
1901/// occurs for code like:
1902/// #define FOO <a/b.h>
1903/// #include FOO
1904/// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
1905///
1906/// This code concatenates and consumes tokens up to the '>' token. It returns
1907/// false if the > was found, otherwise it returns true if it finds and consumes
1908/// the EOM marker.
1909static bool ConcatenateIncludeName(llvm::SmallVector<char, 128> &FilenameBuffer,
1910 Preprocessor &PP) {
1911 Token CurTok;
1912
1913 PP.Lex(CurTok);
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001914 while (CurTok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +00001915 // Append the spelling of this token to the buffer. If there was a space
1916 // before it, add it now.
1917 if (CurTok.hasLeadingSpace())
1918 FilenameBuffer.push_back(' ');
1919
1920 // Get the spelling of the token, directly into FilenameBuffer if possible.
1921 unsigned PreAppendSize = FilenameBuffer.size();
1922 FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
1923
1924 const char *BufPtr = &FilenameBuffer[PreAppendSize];
1925 unsigned ActualLen = PP.getSpelling(CurTok, BufPtr);
1926
1927 // If the token was spelled somewhere else, copy it into FilenameBuffer.
1928 if (BufPtr != &FilenameBuffer[PreAppendSize])
1929 memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1930
1931 // Resize FilenameBuffer to the correct size.
1932 if (CurTok.getLength() != ActualLen)
1933 FilenameBuffer.resize(PreAppendSize+ActualLen);
1934
1935 // If we found the '>' marker, return success.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00001936 if (CurTok.is(tok::greater))
Chris Lattner4b009652007-07-25 00:24:17 +00001937 return false;
1938
1939 PP.Lex(CurTok);
1940 }
1941
1942 // If we hit the eom marker, emit an error and return true so that the caller
1943 // knows the EOM has been read.
1944 PP.Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
1945 return true;
1946}
1947
1948/// HandleIncludeDirective - The "#include" tokens have just been read, read the
1949/// file to be included from the lexer, then include it! This is a common
1950/// routine with functionality shared between #include, #include_next and
1951/// #import.
1952void Preprocessor::HandleIncludeDirective(Token &IncludeTok,
1953 const DirectoryLookup *LookupFrom,
1954 bool isImport) {
1955
1956 Token FilenameTok;
1957 CurLexer->LexIncludeFilename(FilenameTok);
1958
1959 // Reserve a buffer to get the spelling.
1960 llvm::SmallVector<char, 128> FilenameBuffer;
1961 const char *FilenameStart, *FilenameEnd;
1962
1963 switch (FilenameTok.getKind()) {
1964 case tok::eom:
1965 // If the token kind is EOM, the error has already been diagnosed.
1966 return;
1967
1968 case tok::angle_string_literal:
1969 case tok::string_literal: {
1970 FilenameBuffer.resize(FilenameTok.getLength());
1971 FilenameStart = &FilenameBuffer[0];
1972 unsigned Len = getSpelling(FilenameTok, FilenameStart);
1973 FilenameEnd = FilenameStart+Len;
1974 break;
1975 }
1976
1977 case tok::less:
1978 // This could be a <foo/bar.h> file coming from a macro expansion. In this
1979 // case, glue the tokens together into FilenameBuffer and interpret those.
1980 FilenameBuffer.push_back('<');
1981 if (ConcatenateIncludeName(FilenameBuffer, *this))
1982 return; // Found <eom> but no ">"? Diagnostic already emitted.
1983 FilenameStart = &FilenameBuffer[0];
1984 FilenameEnd = &FilenameBuffer[FilenameBuffer.size()];
1985 break;
1986 default:
1987 Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
1988 DiscardUntilEndOfDirective();
1989 return;
1990 }
1991
1992 bool isAngled = GetIncludeFilenameSpelling(FilenameTok.getLocation(),
1993 FilenameStart, FilenameEnd);
1994 // If GetIncludeFilenameSpelling set the start ptr to null, there was an
1995 // error.
1996 if (FilenameStart == 0) {
1997 DiscardUntilEndOfDirective();
1998 return;
1999 }
2000
2001 // Verify that there is nothing after the filename, other than EOM. Use the
2002 // preprocessor to lex this in case lexing the filename entered a macro.
2003 CheckEndOfDirective("#include");
2004
2005 // Check that we don't have infinite #include recursion.
2006 if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1)
2007 return Diag(FilenameTok, diag::err_pp_include_too_deep);
2008
2009 // Search include directories.
2010 const DirectoryLookup *CurDir;
2011 const FileEntry *File = LookupFile(FilenameStart, FilenameEnd,
2012 isAngled, LookupFrom, CurDir);
2013 if (File == 0)
2014 return Diag(FilenameTok, diag::err_pp_file_not_found,
2015 std::string(FilenameStart, FilenameEnd));
2016
2017 // Ask HeaderInfo if we should enter this #include file.
2018 if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) {
2019 // If it returns true, #including this file will have no effect.
2020 return;
2021 }
2022
2023 // Look up the file, create a File ID for it.
2024 unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
2025 if (FileID == 0)
2026 return Diag(FilenameTok, diag::err_pp_file_not_found,
2027 std::string(FilenameStart, FilenameEnd));
2028
2029 // Finally, if all is good, enter the new file!
2030 EnterSourceFile(FileID, CurDir);
2031}
2032
2033/// HandleIncludeNextDirective - Implements #include_next.
2034///
2035void Preprocessor::HandleIncludeNextDirective(Token &IncludeNextTok) {
2036 Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2037
2038 // #include_next is like #include, except that we start searching after
2039 // the current found directory. If we can't do this, issue a
2040 // diagnostic.
2041 const DirectoryLookup *Lookup = CurDirLookup;
2042 if (isInPrimaryFile()) {
2043 Lookup = 0;
2044 Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2045 } else if (Lookup == 0) {
2046 Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2047 } else {
2048 // Start looking up in the next directory.
2049 ++Lookup;
2050 }
2051
2052 return HandleIncludeDirective(IncludeNextTok, Lookup);
2053}
2054
2055/// HandleImportDirective - Implements #import.
2056///
2057void Preprocessor::HandleImportDirective(Token &ImportTok) {
2058 Diag(ImportTok, diag::ext_pp_import_directive);
2059
2060 return HandleIncludeDirective(ImportTok, 0, true);
2061}
2062
2063//===----------------------------------------------------------------------===//
2064// Preprocessor Macro Directive Handling.
2065//===----------------------------------------------------------------------===//
2066
2067/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro
2068/// definition has just been read. Lex the rest of the arguments and the
2069/// closing ), updating MI with what we learn. Return true if an error occurs
2070/// parsing the arg list.
2071bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) {
2072 llvm::SmallVector<IdentifierInfo*, 32> Arguments;
2073
2074 Token Tok;
2075 while (1) {
2076 LexUnexpandedToken(Tok);
2077 switch (Tok.getKind()) {
2078 case tok::r_paren:
2079 // Found the end of the argument list.
2080 if (Arguments.empty()) { // #define FOO()
2081 MI->setArgumentList(Arguments.begin(), Arguments.end());
2082 return false;
2083 }
2084 // Otherwise we have #define FOO(A,)
2085 Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2086 return true;
2087 case tok::ellipsis: // #define X(... -> C99 varargs
2088 // Warn if use of C99 feature in non-C99 mode.
2089 if (!Features.C99) Diag(Tok, diag::ext_variadic_macro);
2090
2091 // Lex the token after the identifier.
2092 LexUnexpandedToken(Tok);
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002093 if (Tok.isNot(tok::r_paren)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002094 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2095 return true;
2096 }
2097 // Add the __VA_ARGS__ identifier as an argument.
2098 Arguments.push_back(Ident__VA_ARGS__);
2099 MI->setIsC99Varargs();
2100 MI->setArgumentList(Arguments.begin(), Arguments.end());
2101 return false;
2102 case tok::eom: // #define X(
2103 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2104 return true;
2105 default:
2106 // Handle keywords and identifiers here to accept things like
2107 // #define Foo(for) for.
2108 IdentifierInfo *II = Tok.getIdentifierInfo();
2109 if (II == 0) {
2110 // #define X(1
2111 Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2112 return true;
2113 }
2114
2115 // If this is already used as an argument, it is used multiple times (e.g.
2116 // #define X(A,A.
2117 if (std::find(Arguments.begin(), Arguments.end(), II) !=
2118 Arguments.end()) { // C99 6.10.3p6
2119 Diag(Tok, diag::err_pp_duplicate_name_in_arg_list, II->getName());
2120 return true;
2121 }
2122
2123 // Add the argument to the macro info.
2124 Arguments.push_back(II);
2125
2126 // Lex the token after the identifier.
2127 LexUnexpandedToken(Tok);
2128
2129 switch (Tok.getKind()) {
2130 default: // #define X(A B
2131 Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2132 return true;
2133 case tok::r_paren: // #define X(A)
2134 MI->setArgumentList(Arguments.begin(), Arguments.end());
2135 return false;
2136 case tok::comma: // #define X(A,
2137 break;
2138 case tok::ellipsis: // #define X(A... -> GCC extension
2139 // Diagnose extension.
2140 Diag(Tok, diag::ext_named_variadic_macro);
2141
2142 // Lex the token after the identifier.
2143 LexUnexpandedToken(Tok);
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002144 if (Tok.isNot(tok::r_paren)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002145 Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2146 return true;
2147 }
2148
2149 MI->setIsGNUVarargs();
2150 MI->setArgumentList(Arguments.begin(), Arguments.end());
2151 return false;
2152 }
2153 }
2154 }
2155}
2156
2157/// HandleDefineDirective - Implements #define. This consumes the entire macro
Chris Lattnerfc457002008-03-05 01:18:20 +00002158/// line then lets the caller lex the next real token.
2159void Preprocessor::HandleDefineDirective(Token &DefineTok) {
Chris Lattner4b009652007-07-25 00:24:17 +00002160 ++NumDefined;
2161
2162 Token MacroNameTok;
2163 ReadMacroName(MacroNameTok, 1);
2164
2165 // Error reading macro name? If so, diagnostic already issued.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002166 if (MacroNameTok.is(tok::eom))
Chris Lattner4b009652007-07-25 00:24:17 +00002167 return;
2168
2169 // If we are supposed to keep comments in #defines, reenable comment saving
2170 // mode.
2171 CurLexer->KeepCommentMode = KeepMacroComments;
2172
2173 // Create the new macro.
2174 MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation());
Chris Lattner4b009652007-07-25 00:24:17 +00002175
2176 Token Tok;
2177 LexUnexpandedToken(Tok);
2178
2179 // If this is a function-like macro definition, parse the argument list,
2180 // marking each of the identifiers as being used as macro arguments. Also,
2181 // check other constraints on the first token of the macro body.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002182 if (Tok.is(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002183 // If there is no body to this macro, we have no special handling here.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002184 } else if (Tok.is(tok::l_paren) && !Tok.hasLeadingSpace()) {
Chris Lattner4b009652007-07-25 00:24:17 +00002185 // This is a function-like macro definition. Read the argument list.
2186 MI->setIsFunctionLike();
2187 if (ReadMacroDefinitionArgList(MI)) {
2188 // Forget about MI.
2189 delete MI;
2190 // Throw away the rest of the line.
2191 if (CurLexer->ParsingPreprocessorDirective)
2192 DiscardUntilEndOfDirective();
2193 return;
2194 }
2195
2196 // Read the first token after the arg list for down below.
2197 LexUnexpandedToken(Tok);
2198 } else if (!Tok.hasLeadingSpace()) {
2199 // C99 requires whitespace between the macro definition and the body. Emit
2200 // a diagnostic for something like "#define X+".
2201 if (Features.C99) {
2202 Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2203 } else {
2204 // FIXME: C90/C++ do not get this diagnostic, but it does get a similar
2205 // one in some cases!
2206 }
2207 } else {
2208 // This is a normal token with leading space. Clear the leading space
2209 // marker on the first token to get proper expansion.
2210 Tok.clearFlag(Token::LeadingSpace);
2211 }
2212
2213 // If this is a definition of a variadic C99 function-like macro, not using
2214 // the GNU named varargs extension, enabled __VA_ARGS__.
2215
2216 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
2217 // This gets unpoisoned where it is allowed.
2218 assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!");
2219 if (MI->isC99Varargs())
2220 Ident__VA_ARGS__->setIsPoisoned(false);
2221
2222 // Read the rest of the macro body.
2223 if (MI->isObjectLike()) {
2224 // Object-like macros are very simple, just read their body.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002225 while (Tok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002226 MI->AddTokenToBody(Tok);
2227 // Get the next token of the macro.
2228 LexUnexpandedToken(Tok);
2229 }
2230
2231 } else {
2232 // Otherwise, read the body of a function-like macro. This has to validate
2233 // the # (stringize) operator.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002234 while (Tok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002235 MI->AddTokenToBody(Tok);
2236
2237 // Check C99 6.10.3.2p1: ensure that # operators are followed by macro
2238 // parameters in function-like macro expansions.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002239 if (Tok.isNot(tok::hash)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002240 // Get the next token of the macro.
2241 LexUnexpandedToken(Tok);
2242 continue;
2243 }
2244
2245 // Get the next token of the macro.
2246 LexUnexpandedToken(Tok);
2247
2248 // Not a macro arg identifier?
2249 if (!Tok.getIdentifierInfo() ||
2250 MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) {
2251 Diag(Tok, diag::err_pp_stringize_not_parameter);
2252 delete MI;
2253
2254 // Disable __VA_ARGS__ again.
2255 Ident__VA_ARGS__->setIsPoisoned(true);
2256 return;
2257 }
2258
2259 // Things look ok, add the param name token to the macro.
2260 MI->AddTokenToBody(Tok);
2261
2262 // Get the next token of the macro.
2263 LexUnexpandedToken(Tok);
2264 }
2265 }
2266
2267
2268 // Disable __VA_ARGS__ again.
2269 Ident__VA_ARGS__->setIsPoisoned(true);
2270
2271 // Check that there is no paste (##) operator at the begining or end of the
2272 // replacement list.
2273 unsigned NumTokens = MI->getNumTokens();
2274 if (NumTokens != 0) {
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002275 if (MI->getReplacementToken(0).is(tok::hashhash)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002276 Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
2277 delete MI;
2278 return;
2279 }
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002280 if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
Chris Lattner4b009652007-07-25 00:24:17 +00002281 Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
2282 delete MI;
2283 return;
2284 }
2285 }
2286
2287 // If this is the primary source file, remember that this macro hasn't been
2288 // used yet.
2289 if (isInPrimaryFile())
2290 MI->setIsUsed(false);
2291
2292 // Finally, if this identifier already had a macro defined for it, verify that
2293 // the macro bodies are identical and free the old definition.
Chris Lattner7a1b0882007-10-07 08:44:20 +00002294 if (MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo())) {
Chris Lattner4b009652007-07-25 00:24:17 +00002295 if (!OtherMI->isUsed())
2296 Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
2297
2298 // Macros must be identical. This means all tokes and whitespace separation
2299 // must be the same. C99 6.10.3.2.
2300 if (!MI->isIdenticalTo(*OtherMI, *this)) {
2301 Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef,
2302 MacroNameTok.getIdentifierInfo()->getName());
2303 Diag(OtherMI->getDefinitionLoc(), diag::ext_pp_macro_redef2);
2304 }
2305 delete OtherMI;
2306 }
2307
Chris Lattner7a1b0882007-10-07 08:44:20 +00002308 setMacroInfo(MacroNameTok.getIdentifierInfo(), MI);
Chris Lattner4b009652007-07-25 00:24:17 +00002309}
2310
Chris Lattner4b009652007-07-25 00:24:17 +00002311/// HandleUndefDirective - Implements #undef.
2312///
2313void Preprocessor::HandleUndefDirective(Token &UndefTok) {
2314 ++NumUndefined;
2315
2316 Token MacroNameTok;
2317 ReadMacroName(MacroNameTok, 2);
2318
2319 // Error reading macro name? If so, diagnostic already issued.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002320 if (MacroNameTok.is(tok::eom))
Chris Lattner4b009652007-07-25 00:24:17 +00002321 return;
2322
2323 // Check to see if this is the last token on the #undef line.
2324 CheckEndOfDirective("#undef");
2325
2326 // Okay, we finally have a valid identifier to undef.
Chris Lattner7a1b0882007-10-07 08:44:20 +00002327 MacroInfo *MI = getMacroInfo(MacroNameTok.getIdentifierInfo());
Chris Lattner4b009652007-07-25 00:24:17 +00002328
Chris Lattner4b009652007-07-25 00:24:17 +00002329 // If the macro is not defined, this is a noop undef, just return.
2330 if (MI == 0) return;
2331
2332 if (!MI->isUsed())
2333 Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
2334
2335 // Free macro definition.
2336 delete MI;
Chris Lattner7a1b0882007-10-07 08:44:20 +00002337 setMacroInfo(MacroNameTok.getIdentifierInfo(), 0);
Chris Lattner4b009652007-07-25 00:24:17 +00002338}
2339
2340
2341//===----------------------------------------------------------------------===//
2342// Preprocessor Conditional Directive Handling.
2343//===----------------------------------------------------------------------===//
2344
2345/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is
2346/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true
2347/// if any tokens have been returned or pp-directives activated before this
2348/// #ifndef has been lexed.
2349///
2350void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef,
2351 bool ReadAnyTokensBeforeDirective) {
2352 ++NumIf;
2353 Token DirectiveTok = Result;
2354
2355 Token MacroNameTok;
2356 ReadMacroName(MacroNameTok);
2357
2358 // Error reading macro name? If so, diagnostic already issued.
Chris Lattnercb8e41c2007-10-09 18:02:16 +00002359 if (MacroNameTok.is(tok::eom)) {
Chris Lattnere6cdeb52007-09-24 05:14:57 +00002360 // Skip code until we get to #endif. This helps with recovery by not
2361 // emitting an error when the #endif is reached.
2362 SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
2363 /*Foundnonskip*/false, /*FoundElse*/false);
Chris Lattner4b009652007-07-25 00:24:17 +00002364 return;
Chris Lattnere6cdeb52007-09-24 05:14:57 +00002365 }
Chris Lattner4b009652007-07-25 00:24:17 +00002366
2367 // Check to see if this is the last token on the #if[n]def line.
2368 CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
Lauro Ramos Venanciod25c7e72008-02-25 19:03:15 +00002369
2370 if (CurLexer->getConditionalStackDepth() == 0) {
2371 // If the start of a top-level #ifdef, inform MIOpt.
2372 if (!ReadAnyTokensBeforeDirective) {
2373 assert(isIfndef && "#ifdef shouldn't reach here");
2374 CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
2375 } else
2376 CurLexer->MIOpt.EnterTopLevelConditional();
Chris Lattner4b009652007-07-25 00:24:17 +00002377 }
Lauro Ramos Venanciod25c7e72008-02-25 19:03:15 +00002378
Chris Lattner4b009652007-07-25 00:24:17 +00002379 IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
Chris Lattner7a1b0882007-10-07 08:44:20 +00002380 MacroInfo *MI = getMacroInfo(MII);
Chris Lattner4b009652007-07-25 00:24:17 +00002381
2382 // If there is a macro, process it.
Chris Lattnerfc457002008-03-05 01:18:20 +00002383 if (MI) // Mark it used.
Chris Lattner4b009652007-07-25 00:24:17 +00002384 MI->setIsUsed(true);
Chris Lattner4b009652007-07-25 00:24:17 +00002385
2386 // Should we include the stuff contained by this directive?
2387 if (!MI == isIfndef) {
2388 // Yes, remember that we are inside a conditional, then lex the next token.
2389 CurLexer->pushConditionalLevel(DirectiveTok.getLocation(), /*wasskip*/false,
2390 /*foundnonskip*/true, /*foundelse*/false);
2391 } else {
2392 // No, skip the contents of this block and return the first token after it.
2393 SkipExcludedConditionalBlock(DirectiveTok.getLocation(),
2394 /*Foundnonskip*/false,
2395 /*FoundElse*/false);
2396 }
2397}
2398
2399/// HandleIfDirective - Implements the #if directive.
2400///
2401void Preprocessor::HandleIfDirective(Token &IfToken,
2402 bool ReadAnyTokensBeforeDirective) {
2403 ++NumIf;
2404
2405 // Parse and evaluation the conditional expression.
2406 IdentifierInfo *IfNDefMacro = 0;
2407 bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro);
2408
2409 // Should we include the stuff contained by this directive?
2410 if (ConditionalTrue) {
2411 // If this condition is equivalent to #ifndef X, and if this is the first
2412 // directive seen, handle it for the multiple-include optimization.
Lauro Ramos Venanciod25c7e72008-02-25 19:03:15 +00002413 if (CurLexer->getConditionalStackDepth() == 0) {
2414 if (!ReadAnyTokensBeforeDirective && IfNDefMacro)
2415 CurLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro);
2416 else
2417 CurLexer->MIOpt.EnterTopLevelConditional();
2418 }
Chris Lattner4b009652007-07-25 00:24:17 +00002419
2420 // Yes, remember that we are inside a conditional, then lex the next token.
2421 CurLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
2422 /*foundnonskip*/true, /*foundelse*/false);
2423 } else {
2424 // No, skip the contents of this block and return the first token after it.
2425 SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false,
2426 /*FoundElse*/false);
2427 }
2428}
2429
2430/// HandleEndifDirective - Implements the #endif directive.
2431///
2432void Preprocessor::HandleEndifDirective(Token &EndifToken) {
2433 ++NumEndif;
2434
2435 // Check that this is the whole directive.
2436 CheckEndOfDirective("#endif");
2437
2438 PPConditionalInfo CondInfo;
2439 if (CurLexer->popConditionalLevel(CondInfo)) {
2440 // No conditionals on the stack: this is an #endif without an #if.
2441 return Diag(EndifToken, diag::err_pp_endif_without_if);
2442 }
2443
2444 // If this the end of a top-level #endif, inform MIOpt.
2445 if (CurLexer->getConditionalStackDepth() == 0)
Lauro Ramos Venancio4c446862008-02-25 19:08:51 +00002446 CurLexer->MIOpt.ExitTopLevelConditional();
Chris Lattner4b009652007-07-25 00:24:17 +00002447
2448 assert(!CondInfo.WasSkipping && !CurLexer->LexingRawMode &&
2449 "This code should only be reachable in the non-skipping case!");
2450}
2451
2452
2453void Preprocessor::HandleElseDirective(Token &Result) {
2454 ++NumElse;
2455
2456 // #else directive in a non-skipping conditional... start skipping.
2457 CheckEndOfDirective("#else");
2458
2459 PPConditionalInfo CI;
2460 if (CurLexer->popConditionalLevel(CI))
2461 return Diag(Result, diag::pp_err_else_without_if);
2462
2463 // If this is a top-level #else, inform the MIOpt.
2464 if (CurLexer->getConditionalStackDepth() == 0)
Lauro Ramos Venanciod25c7e72008-02-25 19:03:15 +00002465 CurLexer->MIOpt.EnterTopLevelConditional();
Chris Lattner4b009652007-07-25 00:24:17 +00002466
2467 // If this is a #else with a #else before it, report the error.
2468 if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
2469
2470 // Finally, skip the rest of the contents of this block and return the first
2471 // token after it.
2472 return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
2473 /*FoundElse*/true);
2474}
2475
2476void Preprocessor::HandleElifDirective(Token &ElifToken) {
2477 ++NumElse;
2478
2479 // #elif directive in a non-skipping conditional... start skipping.
2480 // We don't care what the condition is, because we will always skip it (since
2481 // the block immediately before it was included).
2482 DiscardUntilEndOfDirective();
2483
2484 PPConditionalInfo CI;
2485 if (CurLexer->popConditionalLevel(CI))
2486 return Diag(ElifToken, diag::pp_err_elif_without_if);
2487
2488 // If this is a top-level #elif, inform the MIOpt.
2489 if (CurLexer->getConditionalStackDepth() == 0)
Lauro Ramos Venanciod25c7e72008-02-25 19:03:15 +00002490 CurLexer->MIOpt.EnterTopLevelConditional();
Chris Lattner4b009652007-07-25 00:24:17 +00002491
2492 // If this is a #elif with a #else before it, report the error.
2493 if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
2494
2495 // Finally, skip the rest of the contents of this block and return the first
2496 // token after it.
2497 return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true,
2498 /*FoundElse*/CI.FoundElse);
2499}
2500