blob: b83e5120a1c73ba6d2f15164f671fc07699ea283 [file] [log] [blame]
Chris Lattner2e64c072007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Ted Kremenek and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/Decl.h"
18#include "clang/AST/Expr.h"
Ted Kremenek1c1700f2007-08-20 16:18:38 +000019#include "clang/AST/ExprCXX.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000020#include "clang/Lex/Preprocessor.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/Diagnostic.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/TargetInfo.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/StringExtras.h"
Ted Kremenek30c66752007-11-25 00:58:00 +000028#include "SemaUtil.h"
29
Chris Lattner2e64c072007-08-10 20:18:51 +000030using namespace clang;
31
32/// CheckFunctionCall - Check a direct function call for various correctness
33/// and safety properties not strictly enforced by the C type system.
Anders Carlssone7e7aa22007-08-17 05:31:46 +000034bool
Ted Kremenek081ed872007-08-14 17:39:48 +000035Sema::CheckFunctionCall(Expr *Fn,
36 SourceLocation LParenLoc, SourceLocation RParenLoc,
37 FunctionDecl *FDecl,
Chris Lattner2e64c072007-08-10 20:18:51 +000038 Expr** Args, unsigned NumArgsInCall) {
39
40 // Get the IdentifierInfo* for the called function.
41 IdentifierInfo *FnInfo = FDecl->getIdentifier();
42
Anders Carlssone7e7aa22007-08-17 05:31:46 +000043 if (FnInfo->getBuiltinID() ==
44 Builtin::BI__builtin___CFStringMakeConstantString) {
45 assert(NumArgsInCall == 1 &&
Chris Lattnerd58c31c2007-08-30 17:08:17 +000046 "Wrong number of arguments to builtin CFStringMakeConstantString");
Anders Carlssone7e7aa22007-08-17 05:31:46 +000047 return CheckBuiltinCFStringArgument(Args[0]);
Anders Carlssone2674802007-10-12 17:48:41 +000048 } else if (FnInfo->getBuiltinID() == Builtin::BI__builtin_va_start) {
49 if (NumArgsInCall > 2) {
50 Diag(Args[2]->getLocStart(),
51 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
52 SourceRange(Args[2]->getLocStart(),
53 Args[NumArgsInCall - 1]->getLocEnd()));
54 return true;
55 }
56
Fariborz Jahanian336b2e82007-12-04 19:20:11 +000057 FunctionTypeProto* proto = CurFunctionDecl ?
58 cast<FunctionTypeProto>(CurFunctionDecl->getType()) :
59 cast<FunctionTypeProto>(ObjcGetTypeForMethodDefinition(CurMethodDecl));
Anders Carlssone2674802007-10-12 17:48:41 +000060 if (!proto->isVariadic()) {
61 Diag(Fn->getLocStart(),
62 diag::err_va_start_used_in_non_variadic_function);
63 return true;
64 }
65
66 bool SecondArgIsLastNamedArgument = false;
67 if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Args[1])) {
68 if (ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Fariborz Jahanian336b2e82007-12-04 19:20:11 +000069 ParmVarDecl *LastNamedArg = CurFunctionDecl ?
70 CurFunctionDecl->getParamDecl(CurFunctionDecl->getNumParams() - 1) :
71 CurMethodDecl->getParamDecl(CurMethodDecl->getNumParams() - 1);
72
Anders Carlssone2674802007-10-12 17:48:41 +000073 if (PV == LastNamedArg)
74 SecondArgIsLastNamedArgument = true;
75 }
76 }
77
78 if (!SecondArgIsLastNamedArgument)
79 Diag(Args[1]->getLocStart(),
80 diag::warn_second_parameter_of_va_start_not_last_named_argument);
Anders Carlssone7e7aa22007-08-17 05:31:46 +000081 }
82
Chris Lattner2e64c072007-08-10 20:18:51 +000083 // Search the KnownFunctionIDs for the identifier.
84 unsigned i = 0, e = id_num_known_functions;
Ted Kremenek081ed872007-08-14 17:39:48 +000085 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
Anders Carlsson3e9b43b2007-08-17 15:44:17 +000086 if (i == e) return false;
Chris Lattner2e64c072007-08-10 20:18:51 +000087
88 // Printf checking.
89 if (i <= id_vprintf) {
Ted Kremenek081ed872007-08-14 17:39:48 +000090 // Retrieve the index of the format string parameter and determine
91 // if the function is passed a va_arg argument.
Chris Lattner2e64c072007-08-10 20:18:51 +000092 unsigned format_idx = 0;
Ted Kremenek081ed872007-08-14 17:39:48 +000093 bool HasVAListArg = false;
94
Chris Lattner2e64c072007-08-10 20:18:51 +000095 switch (i) {
96 default: assert(false && "No format string argument index.");
97 case id_printf: format_idx = 0; break;
98 case id_fprintf: format_idx = 1; break;
99 case id_sprintf: format_idx = 1; break;
100 case id_snprintf: format_idx = 2; break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000101 case id_asprintf: format_idx = 1; HasVAListArg = true; break;
102 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
103 case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
104 case id_vfprintf: format_idx = 1; HasVAListArg = true; break;
105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break;
106 case id_vprintf: format_idx = 0; HasVAListArg = true; break;
107 }
108
109 CheckPrintfArguments(Fn, LParenLoc, RParenLoc, HasVAListArg,
Ted Kremenek45925ab2007-08-17 16:46:58 +0000110 FDecl, format_idx, Args, NumArgsInCall);
Chris Lattner2e64c072007-08-10 20:18:51 +0000111 }
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000112
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000113 return false;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000114}
115
116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
117/// CFString constructor is correct
Chris Lattnerda050402007-08-25 05:30:33 +0000118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
Chris Lattnere65acc12007-08-25 05:36:18 +0000119 // FIXME: This should go in a helper.
Chris Lattnerda050402007-08-25 05:30:33 +0000120 while (1) {
121 if (ParenExpr *PE = dyn_cast<ParenExpr>(Arg))
122 Arg = PE->getSubExpr();
123 else if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg))
124 Arg = ICE->getSubExpr();
125 else
126 break;
127 }
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000128
129 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
130
131 if (!Literal || Literal->isWide()) {
132 Diag(Arg->getLocStart(),
133 diag::err_cfstring_literal_not_string_constant,
134 Arg->getSourceRange());
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000135 return true;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000136 }
137
138 const char *Data = Literal->getStrData();
139 unsigned Length = Literal->getByteLength();
140
141 for (unsigned i = 0; i < Length; ++i) {
142 if (!isascii(Data[i])) {
143 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
144 diag::warn_cfstring_literal_contains_non_ascii_character,
145 Arg->getSourceRange());
146 break;
147 }
148
149 if (!Data[i]) {
150 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
151 diag::warn_cfstring_literal_contains_nul_character,
152 Arg->getSourceRange());
153 break;
154 }
155 }
156
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000157 return false;
Chris Lattner2e64c072007-08-10 20:18:51 +0000158}
159
160/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek081ed872007-08-14 17:39:48 +0000161/// correct use of format strings.
162///
163/// HasVAListArg - A predicate indicating whether the printf-like
164/// function is passed an explicit va_arg argument (e.g., vprintf)
165///
166/// format_idx - The index into Args for the format string.
167///
168/// Improper format strings to functions in the printf family can be
169/// the source of bizarre bugs and very serious security holes. A
170/// good source of information is available in the following paper
171/// (which includes additional references):
Chris Lattner2e64c072007-08-10 20:18:51 +0000172///
173/// FormatGuard: Automatic Protection From printf Format String
174/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek081ed872007-08-14 17:39:48 +0000175///
176/// Functionality implemented:
177///
178/// We can statically check the following properties for string
179/// literal format strings for non v.*printf functions (where the
180/// arguments are passed directly):
181//
182/// (1) Are the number of format conversions equal to the number of
183/// data arguments?
184///
185/// (2) Does each format conversion correctly match the type of the
186/// corresponding data argument? (TODO)
187///
188/// Moreover, for all printf functions we can:
189///
190/// (3) Check for a missing format string (when not caught by type checking).
191///
192/// (4) Check for no-operation flags; e.g. using "#" with format
193/// conversion 'c' (TODO)
194///
195/// (5) Check the use of '%n', a major source of security holes.
196///
197/// (6) Check for malformed format conversions that don't specify anything.
198///
199/// (7) Check for empty format strings. e.g: printf("");
200///
201/// (8) Check that the format string is a wide literal.
202///
203/// All of these checks can be done by parsing the format string.
204///
205/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner2e64c072007-08-10 20:18:51 +0000206void
Ted Kremenek081ed872007-08-14 17:39:48 +0000207Sema::CheckPrintfArguments(Expr *Fn,
208 SourceLocation LParenLoc, SourceLocation RParenLoc,
209 bool HasVAListArg, FunctionDecl *FDecl,
Ted Kremenek30596542007-08-10 21:21:05 +0000210 unsigned format_idx, Expr** Args,
211 unsigned NumArgsInCall) {
Ted Kremenek081ed872007-08-14 17:39:48 +0000212 // CHECK: printf-like function is called with no format string.
213 if (format_idx >= NumArgsInCall) {
214 Diag(RParenLoc, diag::warn_printf_missing_format_string,
215 Fn->getSourceRange());
216 return;
217 }
218
Chris Lattnere65acc12007-08-25 05:36:18 +0000219 Expr *OrigFormatExpr = Args[format_idx];
220 // FIXME: This should go in a helper.
221 while (1) {
222 if (ParenExpr *PE = dyn_cast<ParenExpr>(OrigFormatExpr))
223 OrigFormatExpr = PE->getSubExpr();
224 else if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(OrigFormatExpr))
225 OrigFormatExpr = ICE->getSubExpr();
226 else
227 break;
228 }
229
Chris Lattner2e64c072007-08-10 20:18:51 +0000230 // CHECK: format string is not a string literal.
231 //
Ted Kremenek081ed872007-08-14 17:39:48 +0000232 // Dynamically generated format strings are difficult to
233 // automatically vet at compile time. Requiring that format strings
234 // are string literals: (1) permits the checking of format strings by
235 // the compiler and thereby (2) can practically remove the source of
236 // many format string exploits.
Chris Lattnere65acc12007-08-25 05:36:18 +0000237 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
Chris Lattner2e64c072007-08-10 20:18:51 +0000238
Ted Kremenek081ed872007-08-14 17:39:48 +0000239 if (FExpr == NULL) {
240 Diag(Args[format_idx]->getLocStart(),
241 diag::warn_printf_not_string_constant, Fn->getSourceRange());
242 return;
243 }
244
245 // CHECK: is the format string a wide literal?
246 if (FExpr->isWide()) {
247 Diag(Args[format_idx]->getLocStart(),
248 diag::warn_printf_format_string_is_wide_literal,
249 Fn->getSourceRange());
250 return;
251 }
252
253 // Str - The format string. NOTE: this is NOT null-terminated!
254 const char * const Str = FExpr->getStrData();
255
256 // CHECK: empty format string?
257 const unsigned StrLen = FExpr->getByteLength();
258
259 if (StrLen == 0) {
260 Diag(Args[format_idx]->getLocStart(),
261 diag::warn_printf_empty_format_string, Fn->getSourceRange());
262 return;
263 }
264
265 // We process the format string using a binary state machine. The
266 // current state is stored in CurrentState.
267 enum {
268 state_OrdChr,
269 state_Conversion
270 } CurrentState = state_OrdChr;
271
272 // numConversions - The number of conversions seen so far. This is
273 // incremented as we traverse the format string.
274 unsigned numConversions = 0;
275
276 // numDataArgs - The number of data arguments after the format
277 // string. This can only be determined for non vprintf-like
278 // functions. For those functions, this value is 1 (the sole
279 // va_arg argument).
280 unsigned numDataArgs = NumArgsInCall-(format_idx+1);
281
282 // Inspect the format string.
283 unsigned StrIdx = 0;
284
285 // LastConversionIdx - Index within the format string where we last saw
286 // a '%' character that starts a new format conversion.
287 unsigned LastConversionIdx = 0;
288
289 for ( ; StrIdx < StrLen ; ++StrIdx ) {
290
291 // Is the number of detected conversion conversions greater than
292 // the number of matching data arguments? If so, stop.
293 if (!HasVAListArg && numConversions > numDataArgs) break;
294
295 // Handle "\0"
296 if(Str[StrIdx] == '\0' ) {
297 // The string returned by getStrData() is not null-terminated,
298 // so the presence of a null character is likely an error.
299
300 SourceLocation Loc =
301 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),StrIdx+1);
302
303 Diag(Loc, diag::warn_printf_format_string_contains_null_char,
304 Fn->getSourceRange());
305
306 return;
307 }
308
309 // Ordinary characters (not processing a format conversion).
310 if (CurrentState == state_OrdChr) {
311 if (Str[StrIdx] == '%') {
312 CurrentState = state_Conversion;
313 LastConversionIdx = StrIdx;
314 }
315 continue;
316 }
317
318 // Seen '%'. Now processing a format conversion.
319 switch (Str[StrIdx]) {
Ted Kremenek035d8792007-10-12 20:51:52 +0000320 // Handle dynamic precision or width specifier.
321 case '*': {
322 ++numConversions;
323
324 if (!HasVAListArg && numConversions > numDataArgs) {
325
326 SourceLocation Loc =
327 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
328 StrIdx+1);
329
330 if (Str[StrIdx-1] == '.')
331 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
332 Fn->getSourceRange());
333 else
334 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
335 Fn->getSourceRange());
336
337 // Don't do any more checking. We'll just emit spurious errors.
338 return;
339 }
340
341 // Perform type checking on width/precision specifier.
342 Expr* E = Args[format_idx+numConversions];
343 QualType T = E->getType().getCanonicalType();
344 if (BuiltinType *BT = dyn_cast<BuiltinType>(T))
345 if (BT->getKind() == BuiltinType::Int)
346 break;
347
348 SourceLocation Loc =
349 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
350 StrIdx+1);
351
352 if (Str[StrIdx-1] == '.')
353 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
354 T.getAsString(), E->getSourceRange());
355 else
356 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
357 T.getAsString(), E->getSourceRange());
358
Ted Kremenek42166a82007-10-12 00:11:27 +0000359 break;
Ted Kremenek035d8792007-10-12 20:51:52 +0000360 }
Ted Kremenek42166a82007-10-12 00:11:27 +0000361
Ted Kremenek081ed872007-08-14 17:39:48 +0000362 // Characters which can terminate a format conversion
363 // (e.g. "%d"). Characters that specify length modifiers or
364 // other flags are handled by the default case below.
365 //
Ted Kremenek42166a82007-10-12 00:11:27 +0000366 // FIXME: additional checks will go into the following cases.
Ted Kremenek081ed872007-08-14 17:39:48 +0000367 case 'i':
368 case 'd':
369 case 'o':
370 case 'u':
371 case 'x':
372 case 'X':
373 case 'D':
374 case 'O':
375 case 'U':
376 case 'e':
377 case 'E':
378 case 'f':
379 case 'F':
380 case 'g':
381 case 'G':
382 case 'a':
383 case 'A':
384 case 'c':
385 case 'C':
386 case 'S':
387 case 's':
Chris Lattner04e04642007-08-26 17:39:38 +0000388 case 'p':
Ted Kremenek081ed872007-08-14 17:39:48 +0000389 ++numConversions;
390 CurrentState = state_OrdChr;
391 break;
392
393 // CHECK: Are we using "%n"? Issue a warning.
394 case 'n': {
395 ++numConversions;
396 CurrentState = state_OrdChr;
397 SourceLocation Loc =
398 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
399 LastConversionIdx+1);
400
401 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
402 break;
403 }
404
405 // Handle "%%"
406 case '%':
407 // Sanity check: Was the first "%" character the previous one?
408 // If not, we will assume that we have a malformed format
409 // conversion, and that the current "%" character is the start
410 // of a new conversion.
411 if (StrIdx - LastConversionIdx == 1)
412 CurrentState = state_OrdChr;
413 else {
414 // Issue a warning: invalid format conversion.
415 SourceLocation Loc =
416 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
417 LastConversionIdx+1);
418
419 Diag(Loc, diag::warn_printf_invalid_conversion,
Ted Kremenek035d8792007-10-12 20:51:52 +0000420 std::string(Str+LastConversionIdx, Str+StrIdx),
Ted Kremenek081ed872007-08-14 17:39:48 +0000421 Fn->getSourceRange());
422
423 // This conversion is broken. Advance to the next format
424 // conversion.
425 LastConversionIdx = StrIdx;
426 ++numConversions;
427 }
428
429 break;
430
431 default:
432 // This case catches all other characters: flags, widths, etc.
433 // We should eventually process those as well.
434 break;
435 }
436 }
437
438 if (CurrentState == state_Conversion) {
439 // Issue a warning: invalid format conversion.
440 SourceLocation Loc =
441 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
442 LastConversionIdx+1);
443
444 Diag(Loc, diag::warn_printf_invalid_conversion,
Chris Lattner6f65d202007-08-26 17:38:22 +0000445 std::string(Str+LastConversionIdx,
446 Str+std::min(LastConversionIdx+2, StrLen)),
Ted Kremenek081ed872007-08-14 17:39:48 +0000447 Fn->getSourceRange());
448 return;
449 }
450
451 if (!HasVAListArg) {
452 // CHECK: Does the number of format conversions exceed the number
453 // of data arguments?
454 if (numConversions > numDataArgs) {
455 SourceLocation Loc =
456 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
457 LastConversionIdx);
458
459 Diag(Loc, diag::warn_printf_insufficient_data_args,
460 Fn->getSourceRange());
461 }
462 // CHECK: Does the number of data arguments exceed the number of
463 // format conversions in the format string?
464 else if (numConversions < numDataArgs)
465 Diag(Args[format_idx+numConversions+1]->getLocStart(),
466 diag::warn_printf_too_many_data_args, Fn->getSourceRange());
467 }
468}
Ted Kremenek45925ab2007-08-17 16:46:58 +0000469
470//===--- CHECK: Return Address of Stack Variable --------------------------===//
471
472static DeclRefExpr* EvalVal(Expr *E);
473static DeclRefExpr* EvalAddr(Expr* E);
474
475/// CheckReturnStackAddr - Check if a return statement returns the address
476/// of a stack variable.
477void
478Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
479 SourceLocation ReturnLoc) {
480
481 // Perform checking for returned stack addresses.
482 if (lhsType->isPointerType()) {
483 if (DeclRefExpr *DR = EvalAddr(RetValExp))
484 Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
485 DR->getDecl()->getIdentifier()->getName(),
486 RetValExp->getSourceRange());
487 }
488 // Perform checking for stack values returned by reference.
489 else if (lhsType->isReferenceType()) {
Ted Kremenek1456f202007-08-27 16:39:17 +0000490 // Check for an implicit cast to a reference.
491 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
492 if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
493 Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
494 DR->getDecl()->getIdentifier()->getName(),
495 RetValExp->getSourceRange());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000496 }
497}
498
499/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
500/// check if the expression in a return statement evaluates to an address
501/// to a location on the stack. The recursion is used to traverse the
502/// AST of the return expression, with recursion backtracking when we
503/// encounter a subexpression that (1) clearly does not lead to the address
504/// of a stack variable or (2) is something we cannot determine leads to
505/// the address of a stack variable based on such local checking.
506///
Ted Kremenekda1300a2007-08-28 17:02:55 +0000507/// EvalAddr processes expressions that are pointers that are used as
508/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek45925ab2007-08-17 16:46:58 +0000509/// At the base case of the recursion is a check for a DeclRefExpr* in
510/// the refers to a stack variable.
511///
512/// This implementation handles:
513///
514/// * pointer-to-pointer casts
515/// * implicit conversions from array references to pointers
516/// * taking the address of fields
517/// * arbitrary interplay between "&" and "*" operators
518/// * pointer arithmetic from an address of a stack variable
519/// * taking the address of an array element where the array is on the stack
520static DeclRefExpr* EvalAddr(Expr *E) {
521
522 // We should only be called for evaluating pointer expressions.
523 assert (E->getType()->isPointerType() && "EvalAddr only works on pointers");
524
525 // Our "symbolic interpreter" is just a dispatch off the currently
526 // viewed AST node. We then recursively traverse the AST by calling
527 // EvalAddr and EvalVal appropriately.
528 switch (E->getStmtClass()) {
529
530 case Stmt::ParenExprClass:
531 // Ignore parentheses.
532 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
533
534 case Stmt::UnaryOperatorClass: {
535 // The only unary operator that make sense to handle here
536 // is AddrOf. All others don't make sense as pointers.
537 UnaryOperator *U = cast<UnaryOperator>(E);
538
539 if (U->getOpcode() == UnaryOperator::AddrOf)
540 return EvalVal(U->getSubExpr());
541 else
542 return NULL;
543 }
544
545 case Stmt::BinaryOperatorClass: {
546 // Handle pointer arithmetic. All other binary operators are not valid
547 // in this context.
548 BinaryOperator *B = cast<BinaryOperator>(E);
549 BinaryOperator::Opcode op = B->getOpcode();
550
551 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
552 return NULL;
553
554 Expr *Base = B->getLHS();
555
556 // Determine which argument is the real pointer base. It could be
557 // the RHS argument instead of the LHS.
558 if (!Base->getType()->isPointerType()) Base = B->getRHS();
559
560 assert (Base->getType()->isPointerType());
561 return EvalAddr(Base);
562 }
563
564 // For conditional operators we need to see if either the LHS or RHS are
565 // valid DeclRefExpr*s. If one of them is valid, we return it.
566 case Stmt::ConditionalOperatorClass: {
567 ConditionalOperator *C = cast<ConditionalOperator>(E);
568
Anders Carlsson37365fc2007-11-30 19:04:31 +0000569 // Handle the GNU extension for missing LHS.
570 if (Expr *lhsExpr = C->getLHS())
571 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
572 return LHS;
573
574 return EvalAddr(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000575 }
576
577 // For implicit casts, we need to handle conversions from arrays to
578 // pointer values, and implicit pointer-to-pointer conversions.
579 case Stmt::ImplicitCastExprClass: {
580 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
581 Expr* SubExpr = IE->getSubExpr();
582
583 if (SubExpr->getType()->isPointerType())
584 return EvalAddr(SubExpr);
585 else
586 return EvalVal(SubExpr);
587 }
588
589 // For casts, we handle pointer-to-pointer conversions (which
590 // is essentially a no-op from our mini-interpreter's standpoint).
591 // For other casts we abort.
592 case Stmt::CastExprClass: {
593 CastExpr *C = cast<CastExpr>(E);
594 Expr *SubExpr = C->getSubExpr();
595
596 if (SubExpr->getType()->isPointerType())
597 return EvalAddr(SubExpr);
598 else
599 return NULL;
600 }
601
Ted Kremenek1c1700f2007-08-20 16:18:38 +0000602 // C++ casts. For dynamic casts, static casts, and const casts, we
603 // are always converting from a pointer-to-pointer, so we just blow
604 // through the cast. In the case the dynamic cast doesn't fail
605 // (and return NULL), we take the conservative route and report cases
606 // where we return the address of a stack variable. For Reinterpre
607 case Stmt::CXXCastExprClass: {
608 CXXCastExpr *C = cast<CXXCastExpr>(E);
609
610 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
611 Expr *S = C->getSubExpr();
612 if (S->getType()->isPointerType())
613 return EvalAddr(S);
614 else
615 return NULL;
616 }
617 else
618 return EvalAddr(C->getSubExpr());
619 }
Ted Kremenek45925ab2007-08-17 16:46:58 +0000620
621 // Everything else: we simply don't reason about them.
622 default:
623 return NULL;
624 }
625}
626
627
628/// EvalVal - This function is complements EvalAddr in the mutual recursion.
629/// See the comments for EvalAddr for more details.
630static DeclRefExpr* EvalVal(Expr *E) {
631
Ted Kremenekda1300a2007-08-28 17:02:55 +0000632 // We should only be called for evaluating non-pointer expressions, or
633 // expressions with a pointer type that are not used as references but instead
634 // are l-values (e.g., DeclRefExpr with a pointer type).
635
Ted Kremenek45925ab2007-08-17 16:46:58 +0000636 // Our "symbolic interpreter" is just a dispatch off the currently
637 // viewed AST node. We then recursively traverse the AST by calling
638 // EvalAddr and EvalVal appropriately.
639 switch (E->getStmtClass()) {
640
641 case Stmt::DeclRefExprClass: {
642 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
643 // at code that refers to a variable's name. We check if it has local
644 // storage within the function, and if so, return the expression.
645 DeclRefExpr *DR = cast<DeclRefExpr>(E);
646
647 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
648 if(V->hasLocalStorage()) return DR;
649
650 return NULL;
651 }
652
653 case Stmt::ParenExprClass:
654 // Ignore parentheses.
655 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
656
657 case Stmt::UnaryOperatorClass: {
658 // The only unary operator that make sense to handle here
659 // is Deref. All others don't resolve to a "name." This includes
660 // handling all sorts of rvalues passed to a unary operator.
661 UnaryOperator *U = cast<UnaryOperator>(E);
662
663 if (U->getOpcode() == UnaryOperator::Deref)
664 return EvalAddr(U->getSubExpr());
665
666 return NULL;
667 }
668
669 case Stmt::ArraySubscriptExprClass: {
670 // Array subscripts are potential references to data on the stack. We
671 // retrieve the DeclRefExpr* for the array variable if it indeed
672 // has local storage.
Ted Kremenek1c1700f2007-08-20 16:18:38 +0000673 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000674 }
675
676 case Stmt::ConditionalOperatorClass: {
677 // For conditional operators we need to see if either the LHS or RHS are
678 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
679 ConditionalOperator *C = cast<ConditionalOperator>(E);
680
Anders Carlsson37365fc2007-11-30 19:04:31 +0000681 // Handle the GNU extension for missing LHS.
682 if (Expr *lhsExpr = C->getLHS())
683 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
684 return LHS;
685
686 return EvalVal(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000687 }
688
689 // Accesses to members are potential references to data on the stack.
690 case Stmt::MemberExprClass: {
691 MemberExpr *M = cast<MemberExpr>(E);
692
693 // Check for indirect access. We only want direct field accesses.
694 if (!M->isArrow())
695 return EvalVal(M->getBase());
696 else
697 return NULL;
698 }
699
700 // Everything else: we simply don't reason about them.
701 default:
702 return NULL;
703 }
704}
Ted Kremenek30c66752007-11-25 00:58:00 +0000705
706//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
707
708/// Check for comparisons of floating point operands using != and ==.
709/// Issue a warning if these are no self-comparisons, as they are not likely
710/// to do what the programmer intended.
711void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
712 bool EmitWarning = true;
713
714 Expr* LeftExprSansParen = IgnoreParen(lex);
715 Expr* RightExprSansParen = IgnoreParen(rex);
716
717 // Special case: check for x == x (which is OK).
718 // Do not emit warnings for such cases.
719 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
720 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
721 if (DRL->getDecl() == DRR->getDecl())
722 EmitWarning = false;
723
Ted Kremenek33159832007-11-29 00:59:04 +0000724
725 // Special case: check for comparisons against literals that can be exactly
726 // represented by APFloat. In such cases, do not emit a warning. This
727 // is a heuristic: often comparison against such literals are used to
728 // detect if a value in a variable has not changed. This clearly can
729 // lead to false negatives.
730 if (EmitWarning) {
731 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
732 if (FLL->isExact())
733 EmitWarning = false;
734 }
735 else
736 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
737 if (FLR->isExact())
738 EmitWarning = false;
739 }
740 }
741
Ted Kremenek30c66752007-11-25 00:58:00 +0000742 // Check for comparisons with builtin types.
743 if (EmitWarning)
744 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
745 if (isCallBuiltin(CL))
746 EmitWarning = false;
747
748 if (EmitWarning)
749 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
750 if (isCallBuiltin(CR))
751 EmitWarning = false;
752
753 // Emit the diagnostic.
754 if (EmitWarning)
755 Diag(loc, diag::warn_floatingpoint_eq,
756 lex->getSourceRange(),rex->getSourceRange());
757}