blob: d0abb085f947cdb835dd18466f5538199c04d7b7 [file] [log] [blame]
Chris Lattner5b183d82006-11-10 05:03:26 +00001//===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements semantic analysis for expressions.
11//
12//===----------------------------------------------------------------------===//
13
14#include "Sema.h"
Chris Lattnercb6a3822006-11-10 06:20:45 +000015#include "clang/AST/ASTContext.h"
Chris Lattner17ed4872006-11-20 04:58:19 +000016#include "clang/AST/Decl.h"
Chris Lattner5b183d82006-11-10 05:03:26 +000017#include "clang/AST/Expr.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Basic/Diagnostic.h"
Chris Lattnerac18be92006-11-20 06:49:47 +000020#include "clang/Basic/LangOptions.h"
Chris Lattner5b183d82006-11-10 05:03:26 +000021#include "clang/Basic/TargetInfo.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/ADT/StringExtras.h"
24using namespace llvm;
25using namespace clang;
26
27
28
29/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
30/// not valid.
31static int HexDigitValue(char C) {
32 if (C >= '0' && C <= '9') return C-'0';
33 if (C >= 'a' && C <= 'f') return C-'a'+10;
34 if (C >= 'A' && C <= 'F') return C-'A'+10;
35 return -1;
36}
37
38/// ParseStringExpr - The specified tokens were lexed as pasted string
39/// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string
40/// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from
41/// multiple tokens. However, the common case is that StringToks points to one
42/// string.
43///
44Action::ExprResult
45Sema::ParseStringExpr(const LexerToken *StringToks, unsigned NumStringToks) {
46 assert(NumStringToks && "Must have at least one string!");
47
48 // Scan all of the string portions, remember the max individual token length,
49 // computing a bound on the concatenated string length, and see whether any
50 // piece is a wide-string. If any of the string portions is a wide-string
51 // literal, the result is a wide-string literal [C99 6.4.5p4].
52 unsigned MaxTokenLength = StringToks[0].getLength();
53 unsigned SizeBound = StringToks[0].getLength()-2; // -2 for "".
54 bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
55
56 // The common case is that there is only one string fragment.
57 for (unsigned i = 1; i != NumStringToks; ++i) {
58 // The string could be shorter than this if it needs cleaning, but this is a
59 // reasonable bound, which is all we need.
60 SizeBound += StringToks[i].getLength()-2; // -2 for "".
61
62 // Remember maximum string piece length.
63 if (StringToks[i].getLength() > MaxTokenLength)
64 MaxTokenLength = StringToks[i].getLength();
65
66 // Remember if we see any wide strings.
67 AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
68 }
69
70
71 // Include space for the null terminator.
72 ++SizeBound;
73
74 // TODO: K&R warning: "traditional C rejects string constant concatenation"
75
76 // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
77 // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
78 unsigned wchar_tByteWidth = ~0U;
79 if (AnyWide)
Chris Lattnerddc135e2006-11-10 06:34:16 +000080 wchar_tByteWidth =Context.Target.getWCharWidth(StringToks[0].getLocation());
Chris Lattner5b183d82006-11-10 05:03:26 +000081
82 // The output buffer size needs to be large enough to hold wide characters.
83 // This is a worst-case assumption which basically corresponds to L"" "long".
84 if (AnyWide)
85 SizeBound *= wchar_tByteWidth;
86
87 // Create a temporary buffer to hold the result string data.
88 SmallString<512> ResultBuf;
89 ResultBuf.resize(SizeBound);
90
91 // Likewise, but for each string piece.
92 SmallString<512> TokenBuf;
93 TokenBuf.resize(MaxTokenLength);
94
95 // Loop over all the strings, getting their spelling, and expanding them to
96 // wide strings as appropriate.
97 char *ResultPtr = &ResultBuf[0]; // Next byte to fill in.
98
99 for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
100 const char *ThisTokBuf = &TokenBuf[0];
101 // Get the spelling of the token, which eliminates trigraphs, etc. We know
102 // that ThisTokBuf points to a buffer that is big enough for the whole token
103 // and 'spelled' tokens can only shrink.
Chris Lattnercb6a3822006-11-10 06:20:45 +0000104 unsigned ThisTokLen = Context.PP.getSpelling(StringToks[i], ThisTokBuf);
Chris Lattner5b183d82006-11-10 05:03:26 +0000105 const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
106
107 // TODO: Input character set mapping support.
108
109 // Skip L marker for wide strings.
110 if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
111
112 assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
113 ++ThisTokBuf;
114
115 while (ThisTokBuf != ThisTokEnd) {
116 // Is this a span of non-escape characters?
117 if (ThisTokBuf[0] != '\\') {
118 const char *InStart = ThisTokBuf;
119 do {
120 ++ThisTokBuf;
121 } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
122
123 // Copy the character span over.
124 unsigned Len = ThisTokBuf-InStart;
125 if (!AnyWide) {
126 memcpy(ResultPtr, InStart, Len);
127 ResultPtr += Len;
128 } else {
129 // Note: our internal rep of wide char tokens is always little-endian.
130 for (; Len; --Len, ++InStart) {
131 *ResultPtr++ = InStart[0];
132 // Add zeros at the end.
133 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
134 *ResultPtr++ = 0;
135 }
136 }
137 continue;
138 }
139
140 // Otherwise, this is an escape character. Skip the '\' char.
141 ++ThisTokBuf;
142
143 // We know that this character can't be off the end of the buffer, because
144 // that would have been \", which would not have been the end of string.
145 unsigned ResultChar = *ThisTokBuf++;
146 switch (ResultChar) {
147 // These map to themselves.
148 case '\\': case '\'': case '"': case '?': break;
149
150 // These have fixed mappings.
151 case 'a':
152 // TODO: K&R: the meaning of '\\a' is different in traditional C
153 ResultChar = 7;
154 break;
155 case 'b':
156 ResultChar = 8;
157 break;
158 case 'e':
Chris Lattnercb6a3822006-11-10 06:20:45 +0000159 Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e");
Chris Lattner5b183d82006-11-10 05:03:26 +0000160 ResultChar = 27;
161 break;
162 case 'f':
163 ResultChar = 12;
164 break;
165 case 'n':
166 ResultChar = 10;
167 break;
168 case 'r':
169 ResultChar = 13;
170 break;
171 case 't':
172 ResultChar = 9;
173 break;
174 case 'v':
175 ResultChar = 11;
176 break;
177
178 //case 'u': case 'U': // FIXME: UCNs.
179 case 'x': // Hex escape.
180 if (ThisTokBuf == ThisTokEnd ||
181 (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
Chris Lattnercb6a3822006-11-10 06:20:45 +0000182 Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits);
Chris Lattner5b183d82006-11-10 05:03:26 +0000183 ResultChar = 0;
184 break;
185 }
186 ++ThisTokBuf; // Consumed one hex digit.
187
188 assert(0 && "hex escape: unimp!");
189 break;
190 case '0': case '1': case '2': case '3':
191 case '4': case '5': case '6': case '7':
192 // Octal escapes.
193 assert(0 && "octal escape: unimp!");
194 break;
195
196 // Otherwise, these are not valid escapes.
197 case '(': case '{': case '[': case '%':
198 // GCC accepts these as extensions. We warn about them as such though.
Chris Lattnercb6a3822006-11-10 06:20:45 +0000199 if (!Context.PP.getLangOptions().NoExtensions) {
200 Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape,
201 std::string()+(char)ResultChar);
Chris Lattner5b183d82006-11-10 05:03:26 +0000202 break;
203 }
204 // FALL THROUGH.
205 default:
206 if (isgraph(ThisTokBuf[0])) {
Chris Lattnercb6a3822006-11-10 06:20:45 +0000207 Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
208 std::string()+(char)ResultChar);
Chris Lattner5b183d82006-11-10 05:03:26 +0000209 } else {
Chris Lattnercb6a3822006-11-10 06:20:45 +0000210 Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
211 "x"+utohexstr(ResultChar));
Chris Lattner5b183d82006-11-10 05:03:26 +0000212 }
213 }
214
215 // Note: our internal rep of wide char tokens is always little-endian.
216 *ResultPtr++ = ResultChar & 0xFF;
217
218 if (AnyWide) {
219 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
220 *ResultPtr++ = ResultChar >> i*8;
221 }
222 }
223 }
224
225 // Add zero terminator.
226 *ResultPtr = 0;
227 if (AnyWide) {
228 for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
229 *ResultPtr++ = 0;
230 }
231
232 SmallVector<SourceLocation, 4> StringTokLocs;
233 for (unsigned i = 0; i != NumStringToks; ++i)
234 StringTokLocs.push_back(StringToks[i].getLocation());
235
236 // FIXME: use factory.
237
238 // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
239 return new StringExpr(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide);
240}
241
Chris Lattnere168f762006-11-10 05:29:30 +0000242
Chris Lattnerac18be92006-11-20 06:49:47 +0000243/// ParseIdentifierExpr - The parser read an identifier in expression context,
244/// validate it per-C99 6.5.1. HasTrailingLParen indicates whether this
245/// identifier is used in an function call context.
246Sema::ExprResult Sema::ParseIdentifierExpr(Scope *S, SourceLocation Loc,
247 IdentifierInfo &II,
248 bool HasTrailingLParen) {
Chris Lattner17ed4872006-11-20 04:58:19 +0000249 // Could be enum-constant or decl.
250 Decl *D = II.getFETokenInfo<Decl>();
251 if (D == 0) {
Chris Lattnerac18be92006-11-20 06:49:47 +0000252 // FIXME: check to see if this is a use of a builtin. By handling builtins
253 // here, we can avoid having to preload tons of decls for functions.
254
255
256 // Otherwise, this is an imlicitly declared function reference (legal in
257 // C90, extension in C99).
258 if (HasTrailingLParen &&
259 // Not in C++.
260 !getLangOptions().CPlusPlus) {
261 D = ImplicitlyDefineFunction(Loc, II, S);
262 } else {
263 // If this name wasn't predeclared and if this is not a function call,
264 // diagnose the problem.
265 Diag(Loc, diag::err_undeclared_var_use, II.getName());
266 return true;
267 }
Chris Lattner17ed4872006-11-20 04:58:19 +0000268 }
269
Chris Lattner32d920b2007-01-26 02:01:53 +0000270 if (isa<TypedefDecl>(D)) {
Chris Lattner17ed4872006-11-20 04:58:19 +0000271 Diag(Loc, diag::err_unexpected_typedef, II.getName());
272 return true;
273 }
274
Chris Lattner5efbb332006-11-20 05:01:40 +0000275 return new DeclRefExpr(D);
Chris Lattner17ed4872006-11-20 04:58:19 +0000276}
Chris Lattnere168f762006-11-10 05:29:30 +0000277
Chris Lattner17ed4872006-11-20 04:58:19 +0000278Sema::ExprResult Sema::ParseSimplePrimaryExpr(SourceLocation Loc,
279 tok::TokenKind Kind) {
Chris Lattnere168f762006-11-10 05:29:30 +0000280 switch (Kind) {
281 default:
282 assert(0 && "Unknown simple primary expr!");
Chris Lattnere168f762006-11-10 05:29:30 +0000283 case tok::char_constant: // constant: character-constant
Chris Lattner17ed4872006-11-20 04:58:19 +0000284 // TODO: MOVE this to be some other callback.
Chris Lattnere168f762006-11-10 05:29:30 +0000285 case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2]
286 case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU]
287 case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU]
Chris Lattner17ed4872006-11-20 04:58:19 +0000288 return 0;
Chris Lattnere168f762006-11-10 05:29:30 +0000289 }
290}
291
Chris Lattner17ed4872006-11-20 04:58:19 +0000292Sema::ExprResult Sema::ParseIntegerConstant(SourceLocation Loc) {
Chris Lattnere168f762006-11-10 05:29:30 +0000293 return new IntegerConstant();
294}
Chris Lattner17ed4872006-11-20 04:58:19 +0000295Sema::ExprResult Sema::ParseFloatingConstant(SourceLocation Loc) {
Chris Lattnere168f762006-11-10 05:29:30 +0000296 return new FloatingConstant();
297}
298
299Action::ExprResult Sema::ParseParenExpr(SourceLocation L, SourceLocation R,
300 ExprTy *Val) {
301 return Val;
302}
303
304
305// Unary Operators. 'Tok' is the token for the operator.
306Action::ExprResult Sema::ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op,
307 ExprTy *Input) {
308 UnaryOperator::Opcode Opc;
309 switch (Op) {
310 default: assert(0 && "Unknown unary op!");
311 case tok::plusplus: Opc = UnaryOperator::PreInc; break;
312 case tok::minusminus: Opc = UnaryOperator::PreDec; break;
313 case tok::amp: Opc = UnaryOperator::AddrOf; break;
314 case tok::star: Opc = UnaryOperator::Deref; break;
315 case tok::plus: Opc = UnaryOperator::Plus; break;
316 case tok::minus: Opc = UnaryOperator::Minus; break;
317 case tok::tilde: Opc = UnaryOperator::Not; break;
318 case tok::exclaim: Opc = UnaryOperator::LNot; break;
319 case tok::kw_sizeof: Opc = UnaryOperator::SizeOf; break;
320 case tok::kw___alignof: Opc = UnaryOperator::AlignOf; break;
321 case tok::kw___real: Opc = UnaryOperator::Real; break;
322 case tok::kw___imag: Opc = UnaryOperator::Imag; break;
323 case tok::ampamp: Opc = UnaryOperator::AddrLabel; break;
324 case tok::kw___extension__:
325 return Input;
326 //Opc = UnaryOperator::Extension;
327 //break;
328 }
329
330 return new UnaryOperator((Expr*)Input, Opc);
331}
332
333Action::ExprResult Sema::
334ParseSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof,
335 SourceLocation LParenLoc, TypeTy *Ty,
336 SourceLocation RParenLoc) {
Chris Lattner0d8b1a12006-11-20 04:34:45 +0000337 // If error parsing type, ignore.
338 if (Ty == 0) return true;
Chris Lattner6531c102007-01-23 22:29:49 +0000339
340 // Verify that this is a valid expression.
341 TypeRef ArgTy = TypeRef::getFromOpaquePtr(Ty);
342
343 if (isa<FunctionType>(ArgTy) && isSizeof) {
344 // alignof(function) is allowed.
345 Diag(OpLoc, diag::ext_sizeof_function_type);
346 return new IntegerConstant(/*1*/);
347 } else if (ArgTy->isVoidType()) {
348 Diag(OpLoc, diag::ext_sizeof_void_type, isSizeof ? "sizeof" : "__alignof");
349 } else if (ArgTy->isIncompleteType()) {
350 std::string TypeName;
351 ArgTy->getAsString(TypeName);
352 Diag(OpLoc, isSizeof ? diag::err_sizeof_incomplete_type :
353 diag::err_alignof_incomplete_type, TypeName);
354 return new IntegerConstant(/*0*/);
355 }
356
357 return new SizeOfAlignOfTypeExpr(isSizeof, ArgTy);
Chris Lattnere168f762006-11-10 05:29:30 +0000358}
359
360
361Action::ExprResult Sema::ParsePostfixUnaryOp(SourceLocation OpLoc,
362 tok::TokenKind Kind,
363 ExprTy *Input) {
364 UnaryOperator::Opcode Opc;
365 switch (Kind) {
366 default: assert(0 && "Unknown unary op!");
367 case tok::plusplus: Opc = UnaryOperator::PostInc; break;
368 case tok::minusminus: Opc = UnaryOperator::PostDec; break;
369 }
370
371 return new UnaryOperator((Expr*)Input, Opc);
372}
373
374Action::ExprResult Sema::
375ParseArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc,
376 ExprTy *Idx, SourceLocation RLoc) {
377 return new ArraySubscriptExpr((Expr*)Base, (Expr*)Idx);
378}
379
380Action::ExprResult Sema::
381ParseMemberReferenceExpr(ExprTy *Base, SourceLocation OpLoc,
382 tok::TokenKind OpKind, SourceLocation MemberLoc,
383 IdentifierInfo &Member) {
384 Decl *MemberDecl = 0;
385 // TODO: Look up MemberDecl.
386 return new MemberExpr((Expr*)Base, OpKind == tok::arrow, MemberDecl);
387}
388
389/// ParseCallExpr - Handle a call to Fn with the specified array of arguments.
390/// This provides the location of the left/right parens and a list of comma
391/// locations.
392Action::ExprResult Sema::
393ParseCallExpr(ExprTy *Fn, SourceLocation LParenLoc,
394 ExprTy **Args, unsigned NumArgs,
395 SourceLocation *CommaLocs, SourceLocation RParenLoc) {
396 return new CallExpr((Expr*)Fn, (Expr**)Args, NumArgs);
397}
398
399Action::ExprResult Sema::
400ParseCastExpr(SourceLocation LParenLoc, TypeTy *Ty,
401 SourceLocation RParenLoc, ExprTy *Op) {
Chris Lattner0d8b1a12006-11-20 04:34:45 +0000402 // If error parsing type, ignore.
403 if (Ty == 0) return true;
404 return new CastExpr(TypeRef::getFromOpaquePtr(Ty), (Expr*)Op);
Chris Lattnere168f762006-11-10 05:29:30 +0000405}
406
407
408
409// Binary Operators. 'Tok' is the token for the operator.
410Action::ExprResult Sema::ParseBinOp(SourceLocation TokLoc, tok::TokenKind Kind,
411 ExprTy *LHS, ExprTy *RHS) {
412 BinaryOperator::Opcode Opc;
413 switch (Kind) {
414 default: assert(0 && "Unknown binop!");
415 case tok::star: Opc = BinaryOperator::Mul; break;
416 case tok::slash: Opc = BinaryOperator::Div; break;
417 case tok::percent: Opc = BinaryOperator::Rem; break;
418 case tok::plus: Opc = BinaryOperator::Add; break;
419 case tok::minus: Opc = BinaryOperator::Sub; break;
420 case tok::lessless: Opc = BinaryOperator::Shl; break;
421 case tok::greatergreater: Opc = BinaryOperator::Shr; break;
422 case tok::lessequal: Opc = BinaryOperator::LE; break;
423 case tok::less: Opc = BinaryOperator::LT; break;
424 case tok::greaterequal: Opc = BinaryOperator::GE; break;
425 case tok::greater: Opc = BinaryOperator::GT; break;
426 case tok::exclaimequal: Opc = BinaryOperator::NE; break;
427 case tok::equalequal: Opc = BinaryOperator::EQ; break;
428 case tok::amp: Opc = BinaryOperator::And; break;
429 case tok::caret: Opc = BinaryOperator::Xor; break;
430 case tok::pipe: Opc = BinaryOperator::Or; break;
431 case tok::ampamp: Opc = BinaryOperator::LAnd; break;
432 case tok::pipepipe: Opc = BinaryOperator::LOr; break;
433 case tok::equal: Opc = BinaryOperator::Assign; break;
434 case tok::starequal: Opc = BinaryOperator::MulAssign; break;
435 case tok::slashequal: Opc = BinaryOperator::DivAssign; break;
436 case tok::percentequal: Opc = BinaryOperator::RemAssign; break;
437 case tok::plusequal: Opc = BinaryOperator::AddAssign; break;
438 case tok::minusequal: Opc = BinaryOperator::SubAssign; break;
439 case tok::lesslessequal: Opc = BinaryOperator::ShlAssign; break;
440 case tok::greatergreaterequal: Opc = BinaryOperator::ShrAssign; break;
441 case tok::ampequal: Opc = BinaryOperator::AndAssign; break;
442 case tok::caretequal: Opc = BinaryOperator::XorAssign; break;
443 case tok::pipeequal: Opc = BinaryOperator::OrAssign; break;
444 case tok::comma: Opc = BinaryOperator::Comma; break;
445 }
446
447 return new BinaryOperator((Expr*)LHS, (Expr*)RHS, Opc);
448}
449
450/// ParseConditionalOp - Parse a ?: operation. Note that 'LHS' may be null
451/// in the case of a the GNU conditional expr extension.
452Action::ExprResult Sema::ParseConditionalOp(SourceLocation QuestionLoc,
453 SourceLocation ColonLoc,
454 ExprTy *Cond, ExprTy *LHS,
455 ExprTy *RHS) {
456 return new ConditionalOperator((Expr*)Cond, (Expr*)LHS, (Expr*)RHS);
457}
458