blob: 8218d0ac06eb26fa4137154b234de8aee2b2c794 [file] [log] [blame]
Chris Lattnerc7a39682008-03-09 03:13:06 +00001//===--- MacroExpansion.cpp - Top level Macro Expansion -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the top level handling of macro expasion for the
11// preprocessor.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/Preprocessor.h"
16#include "MacroArgs.h"
17#include "clang/Lex/MacroInfo.h"
18#include "clang/Basic/SourceManager.h"
19#include "clang/Basic/FileManager.h"
20#include "clang/Basic/Diagnostic.h"
21using namespace clang;
22
23/// setMacroInfo - Specify a macro for this identifier.
24///
25void Preprocessor::setMacroInfo(IdentifierInfo *II, MacroInfo *MI) {
26 if (MI == 0) {
27 if (II->hasMacroDefinition()) {
28 Macros.erase(II);
29 II->setHasMacroDefinition(false);
30 }
31 } else {
32 Macros[II] = MI;
33 II->setHasMacroDefinition(true);
34 }
35}
36
37/// RegisterBuiltinMacro - Register the specified identifier in the identifier
38/// table and mark it as a builtin macro to be expanded.
39IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) {
40 // Get the identifier.
41 IdentifierInfo *Id = getIdentifierInfo(Name);
42
43 // Mark it as being a macro that is builtin.
44 MacroInfo *MI = new MacroInfo(SourceLocation());
45 MI->setIsBuiltinMacro();
46 setMacroInfo(Id, MI);
47 return Id;
48}
49
50
51/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the
52/// identifier table.
53void Preprocessor::RegisterBuiltinMacros() {
54 Ident__LINE__ = RegisterBuiltinMacro("__LINE__");
55 Ident__FILE__ = RegisterBuiltinMacro("__FILE__");
56 Ident__DATE__ = RegisterBuiltinMacro("__DATE__");
57 Ident__TIME__ = RegisterBuiltinMacro("__TIME__");
58 Ident_Pragma = RegisterBuiltinMacro("_Pragma");
59
60 // GCC Extensions.
61 Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__");
62 Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__");
63 Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__");
64}
65
66/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token
67/// in its expansion, currently expands to that token literally.
68static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
69 const IdentifierInfo *MacroIdent,
70 Preprocessor &PP) {
71 IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo();
72
73 // If the token isn't an identifier, it's always literally expanded.
74 if (II == 0) return true;
75
76 // If the identifier is a macro, and if that macro is enabled, it may be
77 // expanded so it's not a trivial expansion.
78 if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
79 // Fast expanding "#define X X" is ok, because X would be disabled.
80 II != MacroIdent)
81 return false;
82
83 // If this is an object-like macro invocation, it is safe to trivially expand
84 // it.
85 if (MI->isObjectLike()) return true;
86
87 // If this is a function-like macro invocation, it's safe to trivially expand
88 // as long as the identifier is not a macro argument.
89 for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
90 I != E; ++I)
91 if (*I == II)
92 return false; // Identifier is a macro argument.
93
94 return true;
95}
96
97
98/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
99/// lexed is a '('. If so, consume the token and return true, if not, this
100/// method should have no observable side-effect on the lexed tokens.
101bool Preprocessor::isNextPPTokenLParen() {
102 // Do some quick tests for rejection cases.
103 unsigned Val;
104 if (CurLexer)
105 Val = CurLexer->isNextPPTokenLParen();
106 else
107 Val = CurTokenLexer->isNextTokenLParen();
108
109 if (Val == 2) {
110 // We have run off the end. If it's a source file we don't
111 // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the
112 // macro stack.
113 if (CurLexer)
114 return false;
115 for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
116 IncludeStackInfo &Entry = IncludeMacroStack[i-1];
117 if (Entry.TheLexer)
118 Val = Entry.TheLexer->isNextPPTokenLParen();
119 else
120 Val = Entry.TheTokenLexer->isNextTokenLParen();
121
122 if (Val != 2)
123 break;
124
125 // Ran off the end of a source file?
126 if (Entry.TheLexer)
127 return false;
128 }
129 }
130
131 // Okay, if we know that the token is a '(', lex it and return. Otherwise we
132 // have found something that isn't a '(' or we found the end of the
133 // translation unit. In either case, return false.
134 if (Val != 1)
135 return false;
136
137 Token Tok;
138 LexUnexpandedToken(Tok);
139 assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
140 return true;
141}
142
143/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
144/// expanded as a macro, handle it and return the next token as 'Identifier'.
145bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
146 MacroInfo *MI) {
147 // If this is a macro exapnsion in the "#if !defined(x)" line for the file,
148 // then the macro could expand to different things in other contexts, we need
149 // to disable the optimization in this case.
150 if (CurLexer) CurLexer->MIOpt.ExpandedMacro();
151
152 // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
153 if (MI->isBuiltinMacro()) {
154 ExpandBuiltinMacro(Identifier);
155 return false;
156 }
157
158 /// Args - If this is a function-like macro expansion, this contains,
159 /// for each macro argument, the list of tokens that were provided to the
160 /// invocation.
161 MacroArgs *Args = 0;
162
163 // If this is a function-like macro, read the arguments.
164 if (MI->isFunctionLike()) {
165 // C99 6.10.3p10: If the preprocessing token immediately after the the macro
166 // name isn't a '(', this macro should not be expanded. Otherwise, consume
167 // it.
168 if (!isNextPPTokenLParen())
169 return true;
170
171 // Remember that we are now parsing the arguments to a macro invocation.
172 // Preprocessor directives used inside macro arguments are not portable, and
173 // this enables the warning.
174 InMacroArgs = true;
175 Args = ReadFunctionLikeMacroArgs(Identifier, MI);
176
177 // Finished parsing args.
178 InMacroArgs = false;
179
180 // If there was an error parsing the arguments, bail out.
181 if (Args == 0) return false;
182
183 ++NumFnMacroExpanded;
184 } else {
185 ++NumMacroExpanded;
186 }
187
188 // Notice that this macro has been used.
189 MI->setIsUsed(true);
190
191 // If we started lexing a macro, enter the macro expansion body.
192
193 // If this macro expands to no tokens, don't bother to push it onto the
194 // expansion stack, only to take it right back off.
195 if (MI->getNumTokens() == 0) {
196 // No need for arg info.
197 if (Args) Args->destroy();
198
199 // Ignore this macro use, just return the next token in the current
200 // buffer.
201 bool HadLeadingSpace = Identifier.hasLeadingSpace();
202 bool IsAtStartOfLine = Identifier.isAtStartOfLine();
203
204 Lex(Identifier);
205
206 // If the identifier isn't on some OTHER line, inherit the leading
207 // whitespace/first-on-a-line property of this token. This handles
208 // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is
209 // empty.
210 if (!Identifier.isAtStartOfLine()) {
211 if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
212 if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
213 }
214 ++NumFastMacroExpanded;
215 return false;
216
217 } else if (MI->getNumTokens() == 1 &&
218 isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
219 *this)){
220 // Otherwise, if this macro expands into a single trivially-expanded
221 // token: expand it now. This handles common cases like
222 // "#define VAL 42".
223
224 // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
225 // identifier to the expanded token.
226 bool isAtStartOfLine = Identifier.isAtStartOfLine();
227 bool hasLeadingSpace = Identifier.hasLeadingSpace();
228
229 // Remember where the token is instantiated.
230 SourceLocation InstantiateLoc = Identifier.getLocation();
231
232 // Replace the result token.
233 Identifier = MI->getReplacementToken(0);
234
235 // Restore the StartOfLine/LeadingSpace markers.
236 Identifier.setFlagValue(Token::StartOfLine , isAtStartOfLine);
237 Identifier.setFlagValue(Token::LeadingSpace, hasLeadingSpace);
238
239 // Update the tokens location to include both its logical and physical
240 // locations.
241 SourceLocation Loc =
242 SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc);
243 Identifier.setLocation(Loc);
244
245 // If this is #define X X, we must mark the result as unexpandible.
246 if (IdentifierInfo *NewII = Identifier.getIdentifierInfo())
247 if (getMacroInfo(NewII) == MI)
248 Identifier.setFlag(Token::DisableExpand);
249
250 // Since this is not an identifier token, it can't be macro expanded, so
251 // we're done.
252 ++NumFastMacroExpanded;
253 return false;
254 }
255
256 // Start expanding the macro.
257 EnterMacro(Identifier, Args);
258
259 // Now that the macro is at the top of the include stack, ask the
260 // preprocessor to read the next token from it.
261 Lex(Identifier);
262 return false;
263}
264
265/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is
266/// invoked to read all of the actual arguments specified for the macro
267/// invocation. This returns null on error.
268MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
269 MacroInfo *MI) {
270 // The number of fixed arguments to parse.
271 unsigned NumFixedArgsLeft = MI->getNumArgs();
272 bool isVariadic = MI->isVariadic();
273
274 // Outer loop, while there are more arguments, keep reading them.
275 Token Tok;
276 Tok.setKind(tok::comma);
277 --NumFixedArgsLeft; // Start reading the first arg.
278
279 // ArgTokens - Build up a list of tokens that make up each argument. Each
280 // argument is separated by an EOF token. Use a SmallVector so we can avoid
281 // heap allocations in the common case.
282 llvm::SmallVector<Token, 64> ArgTokens;
283
284 unsigned NumActuals = 0;
285 while (Tok.is(tok::comma)) {
286 // C99 6.10.3p11: Keep track of the number of l_parens we have seen. Note
287 // that we already consumed the first one.
288 unsigned NumParens = 0;
289
290 while (1) {
291 // Read arguments as unexpanded tokens. This avoids issues, e.g., where
292 // an argument value in a macro could expand to ',' or '(' or ')'.
293 LexUnexpandedToken(Tok);
294
295 if (Tok.is(tok::eof) || Tok.is(tok::eom)) { // "#if f(<eof>" & "#if f(\n"
296 Diag(MacroName, diag::err_unterm_macro_invoc);
297 // Do not lose the EOF/EOM. Return it to the client.
298 MacroName = Tok;
299 return 0;
300 } else if (Tok.is(tok::r_paren)) {
301 // If we found the ) token, the macro arg list is done.
302 if (NumParens-- == 0)
303 break;
304 } else if (Tok.is(tok::l_paren)) {
305 ++NumParens;
306 } else if (Tok.is(tok::comma) && NumParens == 0) {
307 // Comma ends this argument if there are more fixed arguments expected.
308 if (NumFixedArgsLeft)
309 break;
310
311 // If this is not a variadic macro, too many args were specified.
312 if (!isVariadic) {
313 // Emit the diagnostic at the macro name in case there is a missing ).
314 // Emitting it at the , could be far away from the macro name.
315 Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
316 return 0;
317 }
318 // Otherwise, continue to add the tokens to this variable argument.
319 } else if (Tok.is(tok::comment) && !KeepMacroComments) {
320 // If this is a comment token in the argument list and we're just in
321 // -C mode (not -CC mode), discard the comment.
322 continue;
323 } else if (Tok.is(tok::identifier)) {
324 // Reading macro arguments can cause macros that we are currently
325 // expanding from to be popped off the expansion stack. Doing so causes
326 // them to be reenabled for expansion. Here we record whether any
327 // identifiers we lex as macro arguments correspond to disabled macros.
328 // If so, we mark the token as noexpand. This is a subtle aspect of
329 // C99 6.10.3.4p2.
330 if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
331 if (!MI->isEnabled())
332 Tok.setFlag(Token::DisableExpand);
333 }
334
335 ArgTokens.push_back(Tok);
336 }
337
338 // Empty arguments are standard in C99 and supported as an extension in
339 // other modes.
340 if (ArgTokens.empty() && !Features.C99)
341 Diag(Tok, diag::ext_empty_fnmacro_arg);
342
343 // Add a marker EOF token to the end of the token list for this argument.
344 Token EOFTok;
345 EOFTok.startToken();
346 EOFTok.setKind(tok::eof);
347 EOFTok.setLocation(Tok.getLocation());
348 EOFTok.setLength(0);
349 ArgTokens.push_back(EOFTok);
350 ++NumActuals;
351 --NumFixedArgsLeft;
352 };
353
354 // Okay, we either found the r_paren. Check to see if we parsed too few
355 // arguments.
356 unsigned MinArgsExpected = MI->getNumArgs();
357
358 // See MacroArgs instance var for description of this.
359 bool isVarargsElided = false;
360
361 if (NumActuals < MinArgsExpected) {
362 // There are several cases where too few arguments is ok, handle them now.
363 if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) {
364 // Varargs where the named vararg parameter is missing: ok as extension.
365 // #define A(x, ...)
366 // A("blah")
367 Diag(Tok, diag::ext_missing_varargs_arg);
368
369 // Remember this occurred if this is a C99 macro invocation with at least
370 // one actual argument.
371 isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1;
372 } else if (MI->getNumArgs() == 1) {
373 // #define A(x)
374 // A()
375 // is ok because it is an empty argument.
376
377 // Empty arguments are standard in C99 and supported as an extension in
378 // other modes.
379 if (ArgTokens.empty() && !Features.C99)
380 Diag(Tok, diag::ext_empty_fnmacro_arg);
381 } else {
382 // Otherwise, emit the error.
383 Diag(Tok, diag::err_too_few_args_in_macro_invoc);
384 return 0;
385 }
386
387 // Add a marker EOF token to the end of the token list for this argument.
388 SourceLocation EndLoc = Tok.getLocation();
389 Tok.startToken();
390 Tok.setKind(tok::eof);
391 Tok.setLocation(EndLoc);
392 Tok.setLength(0);
393 ArgTokens.push_back(Tok);
394 }
395
396 return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided);
397}
398
399/// ComputeDATE_TIME - Compute the current time, enter it into the specified
400/// scratch buffer, then return DATELoc/TIMELoc locations with the position of
401/// the identifier tokens inserted.
402static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
403 Preprocessor &PP) {
404 time_t TT = time(0);
405 struct tm *TM = localtime(&TT);
406
407 static const char * const Months[] = {
408 "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"
409 };
410
411 char TmpBuffer[100];
412 sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
413 TM->tm_year+1900);
414 DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
415
416 sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
417 TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
418}
419
420/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
421/// as a builtin macro, handle it and return the next token as 'Tok'.
422void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
423 // Figure out which token this is.
424 IdentifierInfo *II = Tok.getIdentifierInfo();
425 assert(II && "Can't be a macro without id info!");
426
427 // If this is an _Pragma directive, expand it, invoke the pragma handler, then
428 // lex the token after it.
429 if (II == Ident_Pragma)
430 return Handle_Pragma(Tok);
431
432 ++NumBuiltinMacroExpanded;
433
434 char TmpBuffer[100];
435
436 // Set up the return result.
437 Tok.setIdentifierInfo(0);
438 Tok.clearFlag(Token::NeedsCleaning);
439
440 if (II == Ident__LINE__) {
441 // __LINE__ expands to a simple numeric value.
442 sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation()));
443 unsigned Length = strlen(TmpBuffer);
444 Tok.setKind(tok::numeric_constant);
445 Tok.setLength(Length);
446 Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
447 } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
448 SourceLocation Loc = Tok.getLocation();
449 if (II == Ident__BASE_FILE__) {
450 Diag(Tok, diag::ext_pp_base_file);
451 SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc);
452 while (NextLoc.isValid()) {
453 Loc = NextLoc;
454 NextLoc = SourceMgr.getIncludeLoc(Loc);
455 }
456 }
457
458 // Escape this filename. Turn '\' -> '\\' '"' -> '\"'
459 std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc));
460 FN = '"' + Lexer::Stringify(FN) + '"';
461 Tok.setKind(tok::string_literal);
462 Tok.setLength(FN.size());
463 Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
464 } else if (II == Ident__DATE__) {
465 if (!DATELoc.isValid())
466 ComputeDATE_TIME(DATELoc, TIMELoc, *this);
467 Tok.setKind(tok::string_literal);
468 Tok.setLength(strlen("\"Mmm dd yyyy\""));
469 Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation()));
470 } else if (II == Ident__TIME__) {
471 if (!TIMELoc.isValid())
472 ComputeDATE_TIME(DATELoc, TIMELoc, *this);
473 Tok.setKind(tok::string_literal);
474 Tok.setLength(strlen("\"hh:mm:ss\""));
475 Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation()));
476 } else if (II == Ident__INCLUDE_LEVEL__) {
477 Diag(Tok, diag::ext_pp_include_level);
478
479 // Compute the include depth of this token.
480 unsigned Depth = 0;
481 SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation());
482 for (; Loc.isValid(); ++Depth)
483 Loc = SourceMgr.getIncludeLoc(Loc);
484
485 // __INCLUDE_LEVEL__ expands to a simple numeric value.
486 sprintf(TmpBuffer, "%u", Depth);
487 unsigned Length = strlen(TmpBuffer);
488 Tok.setKind(tok::numeric_constant);
489 Tok.setLength(Length);
490 Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
491 } else if (II == Ident__TIMESTAMP__) {
492 // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
493 // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
494 Diag(Tok, diag::ext_pp_timestamp);
495
496 // Get the file that we are lexing out of. If we're currently lexing from
497 // a macro, dig into the include stack.
498 const FileEntry *CurFile = 0;
499 Lexer *TheLexer = getCurrentFileLexer();
500
501 if (TheLexer)
502 CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
503
504 // If this file is older than the file it depends on, emit a diagnostic.
505 const char *Result;
506 if (CurFile) {
507 time_t TT = CurFile->getModificationTime();
508 struct tm *TM = localtime(&TT);
509 Result = asctime(TM);
510 } else {
511 Result = "??? ??? ?? ??:??:?? ????\n";
512 }
513 TmpBuffer[0] = '"';
514 strcpy(TmpBuffer+1, Result);
515 unsigned Len = strlen(TmpBuffer);
516 TmpBuffer[Len-1] = '"'; // Replace the newline with a quote.
517 Tok.setKind(tok::string_literal);
518 Tok.setLength(Len);
519 Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation()));
520 } else {
521 assert(0 && "Unknown identifier!");
522 }
523}