blob: c09c2d9600fc6e6e5c57f4157f49b6106c5fc586 [file] [log] [blame]
Chris Lattner22eb9722006-06-18 05:43:12 +00001//===--- MacroExpander.cpp - Lex from a macro expansion -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the MacroExpander interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Lex/MacroExpander.h"
15#include "clang/Lex/MacroInfo.h"
16#include "clang/Lex/Preprocessor.h"
Chris Lattner30709b032006-06-21 03:01:55 +000017#include "clang/Basic/SourceManager.h"
Chris Lattner0707bd32006-07-15 05:23:58 +000018#include "clang/Basic/Diagnostic.h"
Chris Lattner22eb9722006-06-18 05:43:12 +000019using namespace llvm;
20using namespace clang;
21
Chris Lattner78186052006-07-09 00:45:31 +000022//===----------------------------------------------------------------------===//
Chris Lattneree8760b2006-07-15 07:42:55 +000023// MacroArgs Implementation
Chris Lattner78186052006-07-09 00:45:31 +000024//===----------------------------------------------------------------------===//
25
Chris Lattneree8760b2006-07-15 07:42:55 +000026MacroArgs::MacroArgs(const MacroInfo *MI) {
Chris Lattner78186052006-07-09 00:45:31 +000027 assert(MI->isFunctionLike() &&
Chris Lattneree8760b2006-07-15 07:42:55 +000028 "Can't have args for an object-like macro!");
Chris Lattner78186052006-07-09 00:45:31 +000029 // Reserve space for arguments to avoid reallocation.
30 unsigned NumArgs = MI->getNumArgs();
31 if (MI->isC99Varargs() || MI->isGNUVarargs())
32 NumArgs += 3; // Varargs can have more than this, just some guess.
33
Chris Lattneree8760b2006-07-15 07:42:55 +000034 UnexpArgTokens.reserve(NumArgs);
Chris Lattner78186052006-07-09 00:45:31 +000035}
36
Chris Lattneree8760b2006-07-15 07:42:55 +000037/// addArgument - Add an argument for this invocation. This method destroys
38/// the vector passed in to avoid extraneous memory copies. This adds the EOF
39/// token to the end of the argument list as a marker. 'Loc' specifies a
40/// location at the end of the argument, e.g. the ',' token or the ')'.
41void MacroArgs::addArgument(std::vector<LexerToken> &ArgToks,
42 SourceLocation Loc) {
43 UnexpArgTokens.push_back(std::vector<LexerToken>());
44 UnexpArgTokens.back().swap(ArgToks);
45
46 // Add a marker EOF token to the end of the argument list, useful for handling
47 // empty arguments and macro pre-expansion.
48 LexerToken EOFTok;
49 EOFTok.StartToken();
50 EOFTok.SetKind(tok::eof);
51 EOFTok.SetLocation(Loc);
Chris Lattner203b4562006-07-15 21:07:40 +000052 EOFTok.SetLength(0);
Chris Lattneree8760b2006-07-15 07:42:55 +000053 UnexpArgTokens.back().push_back(EOFTok);
54}
55
Chris Lattner203b4562006-07-15 21:07:40 +000056/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
57/// by pre-expansion, return false. Otherwise, conservatively return true.
58bool MacroArgs::ArgNeedsPreexpansion(unsigned ArgNo) const {
59 const std::vector<LexerToken> &ArgTokens = getUnexpArgument(ArgNo);
60
61 // If there are no identifiers in the argument list, or if the identifiers are
62 // known to not be macros, pre-expansion won't modify it.
63 for (unsigned i = 0, e = ArgTokens.size()-1; i != e; ++i)
64 if (IdentifierInfo *II = ArgTokens[i].getIdentifierInfo()) {
65 if (II->getMacroInfo() && II->getMacroInfo()->isEnabled())
66 // Return true even though the macro could be a function-like macro
67 // without a following '(' token.
68 return true;
69 }
70 return false;
71}
72
Chris Lattner7667d0d2006-07-16 18:16:58 +000073/// getPreExpArgument - Return the pre-expanded form of the specified
74/// argument.
75const std::vector<LexerToken> &
76MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
77 assert(Arg < UnexpArgTokens.size() && "Invalid argument number!");
78
79 // If we have already computed this, return it.
80 if (PreExpArgTokens.empty())
81 PreExpArgTokens.resize(UnexpArgTokens.size());
82
83 std::vector<LexerToken> &Result = PreExpArgTokens[Arg];
84 if (!Result.empty()) return Result;
85
86 // Otherwise, we have to pre-expand this argument, populating Result. To do
87 // this, we set up a fake MacroExpander to lex from the unexpanded argument
88 // list. With this installed, we lex expanded tokens until we hit the EOF
89 // token at the end of the unexp list.
90 PP.EnterTokenStream(UnexpArgTokens[Arg]);
91
92 // Lex all of the macro-expanded tokens into Result.
93 do {
94 Result.push_back(LexerToken());
95 PP.Lex(Result.back());
96 } while (Result.back().getKind() != tok::eof);
97
98 // Pop the token stream off the top of the stack. We know that the internal
99 // pointer inside of it is to the "end" of the token stream, but the stack
100 // will not otherwise be popped until the next token is lexed. The problem is
101 // that the token may be lexed sometime after the vector of tokens itself is
102 // destroyed, which would be badness.
103 PP.RemoveTopOfLexerStack();
104 return Result;
105}
106
Chris Lattneree8760b2006-07-15 07:42:55 +0000107
Chris Lattner0707bd32006-07-15 05:23:58 +0000108/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
109/// tokens into the literal string token that should be produced by the C #
110/// preprocessor operator.
111///
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000112static LexerToken StringifyArgument(const std::vector<LexerToken> &Toks,
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000113 Preprocessor &PP, bool Charify = false) {
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000114 LexerToken Tok;
115 Tok.StartToken();
116 Tok.SetKind(tok::string_literal);
Chris Lattner0707bd32006-07-15 05:23:58 +0000117
118 // Stringify all the tokens.
119 std::string Result = "\"";
Chris Lattner7667d0d2006-07-16 18:16:58 +0000120 // FIXME: Optimize this loop to not use std::strings.
Chris Lattner2ada5d32006-07-15 07:51:24 +0000121 for (unsigned i = 0, e = Toks.size()-1 /*no eof*/; i != e; ++i) {
Chris Lattner0707bd32006-07-15 05:23:58 +0000122 const LexerToken &Tok = Toks[i];
Chris Lattner0707bd32006-07-15 05:23:58 +0000123 if (i != 0 && Tok.hasLeadingSpace())
124 Result += ' ';
125
126 // If this is a string or character constant, escape the token as specified
127 // by 6.10.3.2p2.
128 if (Tok.getKind() == tok::string_literal || // "foo" and L"foo".
129 Tok.getKind() == tok::char_constant) { // 'x' and L'x'.
130 Result += Lexer::Stringify(PP.getSpelling(Tok));
131 } else {
132 // Otherwise, just append the token.
133 Result += PP.getSpelling(Tok);
134 }
135 }
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000136
Chris Lattner0707bd32006-07-15 05:23:58 +0000137 // If the last character of the string is a \, and if it isn't escaped, this
138 // is an invalid string literal, diagnose it as specified in C99.
139 if (Result[Result.size()-1] == '\\') {
140 // Count the number of consequtive \ characters. If even, then they are
141 // just escaped backslashes, otherwise it's an error.
142 unsigned FirstNonSlash = Result.size()-2;
143 // Guaranteed to find the starting " if nothing else.
144 while (Result[FirstNonSlash] == '\\')
145 --FirstNonSlash;
146 if ((Result.size()-1-FirstNonSlash) & 1) {
Chris Lattnerf2781502006-07-15 05:27:44 +0000147 // Diagnose errors for things like: #define F(X) #X / F(\)
Chris Lattner0707bd32006-07-15 05:23:58 +0000148 PP.Diag(Toks.back(), diag::pp_invalid_string_literal);
149 Result.erase(Result.end()-1); // remove one of the \'s.
150 }
151 }
Chris Lattner0707bd32006-07-15 05:23:58 +0000152 Result += '"';
153
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000154 // If this is the charify operation and the result is not a legal character
155 // constant, diagnose it.
156 if (Charify) {
157 // First step, turn double quotes into single quotes:
158 Result[0] = '\'';
159 Result[Result.size()-1] = '\'';
160
161 // Check for bogus character.
162 bool isBad = false;
Chris Lattner2ada5d32006-07-15 07:51:24 +0000163 if (Result.size() == 3) {
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000164 isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
165 } else {
166 isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
167 }
168
169 if (isBad) {
Chris Lattner7c581492006-07-15 07:56:31 +0000170 assert(!Toks.empty() && "No eof token at least?");
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000171 PP.Diag(Toks[0], diag::err_invalid_character_to_charify);
Chris Lattner7c581492006-07-15 07:56:31 +0000172 Result = "' '"; // Use something arbitrary, but legal.
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000173 }
174 }
175
Chris Lattner0707bd32006-07-15 05:23:58 +0000176 Tok.SetLength(Result.size());
177 Tok.SetLocation(PP.CreateString(&Result[0], Result.size()));
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000178 return Tok;
179}
180
181/// getStringifiedArgument - Compute, cache, and return the specified argument
182/// that has been 'stringified' as required by the # operator.
Chris Lattneree8760b2006-07-15 07:42:55 +0000183const LexerToken &MacroArgs::getStringifiedArgument(unsigned ArgNo,
184 Preprocessor &PP) {
Chris Lattner2ada5d32006-07-15 07:51:24 +0000185 assert(ArgNo < UnexpArgTokens.size() && "Invalid argument number!");
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000186 if (StringifiedArgs.empty()) {
Chris Lattner2ada5d32006-07-15 07:51:24 +0000187 StringifiedArgs.resize(getNumArguments());
Chris Lattneree8760b2006-07-15 07:42:55 +0000188 memset(&StringifiedArgs[0], 0,
189 sizeof(StringifiedArgs[0])*getNumArguments());
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000190 }
191 if (StringifiedArgs[ArgNo].getKind() != tok::string_literal)
Chris Lattner2ada5d32006-07-15 07:51:24 +0000192 StringifiedArgs[ArgNo] = StringifyArgument(UnexpArgTokens[ArgNo], PP);
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000193 return StringifiedArgs[ArgNo];
194}
195
Chris Lattner78186052006-07-09 00:45:31 +0000196//===----------------------------------------------------------------------===//
197// MacroExpander Implementation
198//===----------------------------------------------------------------------===//
199
Chris Lattner7667d0d2006-07-16 18:16:58 +0000200/// Create a macro expander for the specified macro with the specified actual
201/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
Chris Lattneree8760b2006-07-15 07:42:55 +0000202MacroExpander::MacroExpander(LexerToken &Tok, MacroArgs *Actuals,
Chris Lattner78186052006-07-09 00:45:31 +0000203 Preprocessor &pp)
Chris Lattner7667d0d2006-07-16 18:16:58 +0000204 : Macro(Tok.getIdentifierInfo()->getMacroInfo()),
Chris Lattneree8760b2006-07-15 07:42:55 +0000205 ActualArgs(Actuals), PP(pp), CurToken(0),
Chris Lattner50b497e2006-06-18 16:32:35 +0000206 InstantiateLoc(Tok.getLocation()),
Chris Lattnerd01e2912006-06-18 16:22:51 +0000207 AtStartOfLine(Tok.isAtStartOfLine()),
208 HasLeadingSpace(Tok.hasLeadingSpace()) {
Chris Lattner7667d0d2006-07-16 18:16:58 +0000209 MacroTokens = &Macro->getReplacementTokens();
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000210
211 // If this is a function-like macro, expand the arguments and change
212 // MacroTokens to point to the expanded tokens.
Chris Lattner7667d0d2006-07-16 18:16:58 +0000213 if (Macro->isFunctionLike() && Macro->getNumArgs())
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000214 ExpandFunctionArguments();
Chris Lattner7667d0d2006-07-16 18:16:58 +0000215
216 // Mark the macro as currently disabled, so that it is not recursively
217 // expanded. The macro must be disabled only after argument pre-expansion of
218 // function-like macro arguments occurs.
219 Macro->DisableMacro();
Chris Lattnerd01e2912006-06-18 16:22:51 +0000220}
221
Chris Lattner7667d0d2006-07-16 18:16:58 +0000222/// Create a macro expander for the specified token stream. This does not
223/// take ownership of the specified token vector.
224MacroExpander::MacroExpander(const std::vector<LexerToken> &TokStream,
225 Preprocessor &pp)
226 : Macro(0), ActualArgs(0), PP(pp), MacroTokens(&TokStream), CurToken(0),
227 InstantiateLoc(SourceLocation()), AtStartOfLine(false),
228 HasLeadingSpace(false) {
229
230 // Set HasLeadingSpace/AtStartOfLine so that the first token will be
231 // returned unmodified.
232 if (!TokStream.empty()) {
233 AtStartOfLine = TokStream[0].isAtStartOfLine();
234 HasLeadingSpace = TokStream[0].hasLeadingSpace();
235 }
236}
237
238
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000239MacroExpander::~MacroExpander() {
240 // If this was a function-like macro that actually uses its arguments, delete
241 // the expanded tokens.
Chris Lattner7667d0d2006-07-16 18:16:58 +0000242 if (Macro && MacroTokens != &Macro->getReplacementTokens())
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000243 delete MacroTokens;
244
245 // MacroExpander owns its formal arguments.
Chris Lattneree8760b2006-07-15 07:42:55 +0000246 delete ActualArgs;
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000247}
248
Chris Lattneree8760b2006-07-15 07:42:55 +0000249
250
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000251/// Expand the arguments of a function-like macro so that we can quickly
252/// return preexpanded tokens from MacroTokens.
253void MacroExpander::ExpandFunctionArguments() {
254 std::vector<LexerToken> ResultToks;
255
256 // Loop through the MacroTokens tokens, expanding them into ResultToks. Keep
257 // track of whether we change anything. If not, no need to keep them. If so,
258 // we install the newly expanded sequence as MacroTokens.
259 bool MadeChange = false;
260 for (unsigned i = 0, e = MacroTokens->size(); i != e; ++i) {
261 // If we found the stringify operator, get the argument stringified. The
262 // preprocessor already verified that the following token is a macro name
263 // when the #define was parsed.
264 const LexerToken &CurTok = (*MacroTokens)[i];
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000265 if (CurTok.getKind() == tok::hash || CurTok.getKind() == tok::hashat) {
Chris Lattner7667d0d2006-07-16 18:16:58 +0000266 int ArgNo =Macro->getArgumentNum((*MacroTokens)[i+1].getIdentifierInfo());
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000267 assert(ArgNo != -1 && "Token following # is not an argument?");
268
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000269 if (CurTok.getKind() == tok::hash) // Stringify
Chris Lattneree8760b2006-07-15 07:42:55 +0000270 ResultToks.push_back(ActualArgs->getStringifiedArgument(ArgNo, PP));
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000271 else {
272 // 'charify': don't bother caching these.
273 ResultToks.push_back(StringifyArgument(
Chris Lattneree8760b2006-07-15 07:42:55 +0000274 ActualArgs->getUnexpArgument(ArgNo), PP, true));
Chris Lattnerc783d1d2006-07-15 06:11:25 +0000275 }
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000276
Chris Lattner60161692006-07-15 06:48:02 +0000277 // The stringified/charified string leading space flag gets set to match
278 // the #/#@ operator.
279 if (CurTok.hasLeadingSpace())
280 ResultToks.back().SetFlag(LexerToken::LeadingSpace);
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000281
282 MadeChange = true;
283 ++i; // Skip arg name.
284 } else {
Chris Lattner203b4562006-07-15 21:07:40 +0000285 // Otherwise, if this is not an argument token, just add the token to the
286 // output buffer.
287 IdentifierInfo *II = CurTok.getIdentifierInfo();
Chris Lattner7667d0d2006-07-16 18:16:58 +0000288 int ArgNo = II ? Macro->getArgumentNum(II) : -1;
Chris Lattner203b4562006-07-15 21:07:40 +0000289 if (ArgNo == -1) {
290 ResultToks.push_back(CurTok);
291 continue;
292 }
293
294 // An argument is expanded somehow, the result is different than the
295 // input.
296 MadeChange = true;
297
298 // Otherwise, this is a use of the argument. Find out if there is a paste
299 // (##) operator before or after the argument.
300 bool PasteBefore =
301 !ResultToks.empty() && ResultToks.back().getKind() == tok::hashhash;
302 bool PasteAfter =
303 i+1 != e && (*MacroTokens)[i+1].getKind() == tok::hashhash;
304
305 // If it is not the LHS/RHS of a ## operator, we must pre-expand the
306 // argument and substitute the expanded tokens into the result. This is
307 // C99 6.10.3.1p1.
308 if (!PasteBefore && !PasteAfter) {
309 const std::vector<LexerToken> *ArgToks;
310 // Only preexpand the argument if it could possibly need it. This
311 // avoids some work in common cases.
Chris Lattner7667d0d2006-07-16 18:16:58 +0000312 if (ActualArgs->ArgNeedsPreexpansion(ArgNo))
313 ArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP);
314 else
Chris Lattner203b4562006-07-15 21:07:40 +0000315 ArgToks = &ActualArgs->getUnexpArgument(ArgNo);
Chris Lattner203b4562006-07-15 21:07:40 +0000316
317 unsigned FirstTok = ResultToks.size();
318 ResultToks.insert(ResultToks.end(), ArgToks->begin(), ArgToks->end()-1);
319
320 // If any tokens were substituted from the argument, the whitespace
321 // before the first token should match the whitespace of the arg
322 // identifier.
323 if (FirstTok != ResultToks.size())
324 ResultToks[FirstTok].SetFlagValue(LexerToken::LeadingSpace,
325 CurTok.hasLeadingSpace());
326 continue;
327 }
328
Chris Lattner7667d0d2006-07-16 18:16:58 +0000329 // FIXME: Handle comma swallowing GNU extension.
330
331 // FIXME: handle pasted args. Handle 'placemarker' stuff.
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000332 ResultToks.push_back(CurTok);
333 }
334 }
335
336 // If anything changed, install this as the new MacroTokens list.
337 if (MadeChange) {
338 // This is deleted in the dtor.
339 std::vector<LexerToken> *Res = new std::vector<LexerToken>();
340 Res->swap(ResultToks);
341 MacroTokens = Res;
342 }
343}
Chris Lattner67b07cb2006-06-26 02:03:42 +0000344
Chris Lattner22eb9722006-06-18 05:43:12 +0000345/// Lex - Lex and return a token from this macro stream.
Chris Lattnerd01e2912006-06-18 16:22:51 +0000346///
Chris Lattnercb283342006-06-18 06:48:37 +0000347void MacroExpander::Lex(LexerToken &Tok) {
Chris Lattner22eb9722006-06-18 05:43:12 +0000348 // Lexing off the end of the macro, pop this macro off the expansion stack.
Chris Lattner7667d0d2006-07-16 18:16:58 +0000349 if (isAtEnd()) {
350 // If this is a macro (not a token stream), mark the macro enabled now
351 // that it is no longer being expanded.
352 if (Macro) Macro->EnableMacro();
353
354 // Pop this context off the preprocessors lexer stack and get the next
Chris Lattner2183a6e2006-07-18 06:36:12 +0000355 // token. This will delete "this" so remember the PP instance var.
356 Preprocessor &PPCache = PP;
357 if (PP.HandleEndOfMacro(Tok))
358 return;
359
360 // HandleEndOfMacro may not return a token. If it doesn't, lex whatever is
361 // next.
362 return PPCache.Lex(Tok);
Chris Lattner7667d0d2006-07-16 18:16:58 +0000363 }
Chris Lattner22eb9722006-06-18 05:43:12 +0000364
365 // Get the next token to return.
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000366 Tok = (*MacroTokens)[CurToken++];
Chris Lattner22eb9722006-06-18 05:43:12 +0000367
Chris Lattnerc673f902006-06-30 06:10:41 +0000368 // The token's current location indicate where the token was lexed from. We
369 // need this information to compute the spelling of the token, but any
370 // diagnostics for the expanded token should appear as if they came from
371 // InstantiationLoc. Pull this information together into a new SourceLocation
372 // that captures all of this.
Chris Lattner7667d0d2006-07-16 18:16:58 +0000373 if (InstantiateLoc.isValid()) { // Don't do this for token streams.
374 SourceManager &SrcMgr = PP.getSourceManager();
375 // The token could have come from a prior macro expansion. In that case,
376 // ignore the macro expand part to get to the physloc. This happens for
377 // stuff like: #define A(X) X A(A(X)) A(1)
378 SourceLocation PhysLoc = SrcMgr.getPhysicalLoc(Tok.getLocation());
379 Tok.SetLocation(SrcMgr.getInstantiationLoc(PhysLoc, InstantiateLoc));
380 }
381
Chris Lattner22eb9722006-06-18 05:43:12 +0000382 // If this is the first token, set the lexical properties of the token to
383 // match the lexical properties of the macro identifier.
384 if (CurToken == 1) {
385 Tok.SetFlagValue(LexerToken::StartOfLine , AtStartOfLine);
386 Tok.SetFlagValue(LexerToken::LeadingSpace, HasLeadingSpace);
387 }
388
389 // Handle recursive expansion!
390 if (Tok.getIdentifierInfo())
391 return PP.HandleIdentifier(Tok);
392
393 // Otherwise, return a normal token.
Chris Lattner22eb9722006-06-18 05:43:12 +0000394}
Chris Lattnerafe603f2006-07-11 04:02:46 +0000395
Chris Lattnerd8aee0e2006-07-11 05:04:55 +0000396/// isNextTokenLParen - If the next token lexed will pop this macro off the
397/// expansion stack, return 2. If the next unexpanded token is a '(', return
398/// 1, otherwise return 0.
399unsigned MacroExpander::isNextTokenLParen() const {
Chris Lattnerafe603f2006-07-11 04:02:46 +0000400 // Out of tokens?
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000401 if (isAtEnd())
Chris Lattnerd8aee0e2006-07-11 05:04:55 +0000402 return 2;
Chris Lattnerb935d8c2006-07-14 06:54:44 +0000403 return (*MacroTokens)[CurToken].getKind() == tok::l_paren;
Chris Lattnerafe603f2006-07-11 04:02:46 +0000404}