blob: 4d2e362038c829d323fd8e39681cc55a0bec937a [file] [log] [blame]
Chris Lattner2e64c072007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Ted Kremenek and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/Decl.h"
18#include "clang/AST/Expr.h"
Ted Kremenek1c1700f2007-08-20 16:18:38 +000019#include "clang/AST/ExprCXX.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000020#include "clang/Lex/Preprocessor.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/Diagnostic.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/TargetInfo.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/StringExtras.h"
Ted Kremenek30c66752007-11-25 00:58:00 +000028#include "SemaUtil.h"
29
Chris Lattner2e64c072007-08-10 20:18:51 +000030using namespace clang;
31
32/// CheckFunctionCall - Check a direct function call for various correctness
33/// and safety properties not strictly enforced by the C type system.
Anders Carlssone7e7aa22007-08-17 05:31:46 +000034bool
Ted Kremenek081ed872007-08-14 17:39:48 +000035Sema::CheckFunctionCall(Expr *Fn,
36 SourceLocation LParenLoc, SourceLocation RParenLoc,
37 FunctionDecl *FDecl,
Chris Lattner2e64c072007-08-10 20:18:51 +000038 Expr** Args, unsigned NumArgsInCall) {
39
40 // Get the IdentifierInfo* for the called function.
41 IdentifierInfo *FnInfo = FDecl->getIdentifier();
42
Anders Carlssone7e7aa22007-08-17 05:31:46 +000043 if (FnInfo->getBuiltinID() ==
44 Builtin::BI__builtin___CFStringMakeConstantString) {
45 assert(NumArgsInCall == 1 &&
Chris Lattnerd58c31c2007-08-30 17:08:17 +000046 "Wrong number of arguments to builtin CFStringMakeConstantString");
Anders Carlssone7e7aa22007-08-17 05:31:46 +000047 return CheckBuiltinCFStringArgument(Args[0]);
Anders Carlssone2674802007-10-12 17:48:41 +000048 } else if (FnInfo->getBuiltinID() == Builtin::BI__builtin_va_start) {
49 if (NumArgsInCall > 2) {
50 Diag(Args[2]->getLocStart(),
51 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
52 SourceRange(Args[2]->getLocStart(),
53 Args[NumArgsInCall - 1]->getLocEnd()));
54 return true;
55 }
56
Fariborz Jahanian336b2e82007-12-04 19:20:11 +000057 FunctionTypeProto* proto = CurFunctionDecl ?
58 cast<FunctionTypeProto>(CurFunctionDecl->getType()) :
59 cast<FunctionTypeProto>(ObjcGetTypeForMethodDefinition(CurMethodDecl));
Anders Carlssone2674802007-10-12 17:48:41 +000060 if (!proto->isVariadic()) {
61 Diag(Fn->getLocStart(),
62 diag::err_va_start_used_in_non_variadic_function);
63 return true;
64 }
Steve Naroffc1a88c12007-12-18 03:41:15 +000065 // FIXME: This isn't correct for methods (results in bogus warning).
Anders Carlssone2674802007-10-12 17:48:41 +000066 bool SecondArgIsLastNamedArgument = false;
67 if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Args[1])) {
68 if (ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Fariborz Jahanian336b2e82007-12-04 19:20:11 +000069 ParmVarDecl *LastNamedArg = CurFunctionDecl ?
70 CurFunctionDecl->getParamDecl(CurFunctionDecl->getNumParams() - 1) :
71 CurMethodDecl->getParamDecl(CurMethodDecl->getNumParams() - 1);
72
Anders Carlssone2674802007-10-12 17:48:41 +000073 if (PV == LastNamedArg)
74 SecondArgIsLastNamedArgument = true;
75 }
76 }
77
78 if (!SecondArgIsLastNamedArgument)
79 Diag(Args[1]->getLocStart(),
80 diag::warn_second_parameter_of_va_start_not_last_named_argument);
Anders Carlssone7e7aa22007-08-17 05:31:46 +000081 }
82
Chris Lattner2e64c072007-08-10 20:18:51 +000083 // Search the KnownFunctionIDs for the identifier.
84 unsigned i = 0, e = id_num_known_functions;
Ted Kremenek081ed872007-08-14 17:39:48 +000085 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
Anders Carlsson3e9b43b2007-08-17 15:44:17 +000086 if (i == e) return false;
Chris Lattner2e64c072007-08-10 20:18:51 +000087
88 // Printf checking.
89 if (i <= id_vprintf) {
Ted Kremenek081ed872007-08-14 17:39:48 +000090 // Retrieve the index of the format string parameter and determine
91 // if the function is passed a va_arg argument.
Chris Lattner2e64c072007-08-10 20:18:51 +000092 unsigned format_idx = 0;
Ted Kremenek081ed872007-08-14 17:39:48 +000093 bool HasVAListArg = false;
94
Chris Lattner2e64c072007-08-10 20:18:51 +000095 switch (i) {
96 default: assert(false && "No format string argument index.");
97 case id_printf: format_idx = 0; break;
98 case id_fprintf: format_idx = 1; break;
99 case id_sprintf: format_idx = 1; break;
100 case id_snprintf: format_idx = 2; break;
Ted Kremenek109118e2007-12-17 18:38:31 +0000101 case id_asprintf: format_idx = 1; break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000102 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
103 case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
104 case id_vfprintf: format_idx = 1; HasVAListArg = true; break;
105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break;
106 case id_vprintf: format_idx = 0; HasVAListArg = true; break;
107 }
108
109 CheckPrintfArguments(Fn, LParenLoc, RParenLoc, HasVAListArg,
Ted Kremenek45925ab2007-08-17 16:46:58 +0000110 FDecl, format_idx, Args, NumArgsInCall);
Chris Lattner2e64c072007-08-10 20:18:51 +0000111 }
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000112
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000113 return false;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000114}
115
116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
117/// CFString constructor is correct
Chris Lattnerda050402007-08-25 05:30:33 +0000118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
Chris Lattnere65acc12007-08-25 05:36:18 +0000119 // FIXME: This should go in a helper.
Chris Lattnerda050402007-08-25 05:30:33 +0000120 while (1) {
121 if (ParenExpr *PE = dyn_cast<ParenExpr>(Arg))
122 Arg = PE->getSubExpr();
123 else if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg))
124 Arg = ICE->getSubExpr();
125 else
126 break;
127 }
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000128
129 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
130
131 if (!Literal || Literal->isWide()) {
132 Diag(Arg->getLocStart(),
133 diag::err_cfstring_literal_not_string_constant,
134 Arg->getSourceRange());
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000135 return true;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000136 }
137
138 const char *Data = Literal->getStrData();
139 unsigned Length = Literal->getByteLength();
140
141 for (unsigned i = 0; i < Length; ++i) {
142 if (!isascii(Data[i])) {
143 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
144 diag::warn_cfstring_literal_contains_non_ascii_character,
145 Arg->getSourceRange());
146 break;
147 }
148
149 if (!Data[i]) {
150 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
151 diag::warn_cfstring_literal_contains_nul_character,
152 Arg->getSourceRange());
153 break;
154 }
155 }
156
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000157 return false;
Chris Lattner2e64c072007-08-10 20:18:51 +0000158}
159
160/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek081ed872007-08-14 17:39:48 +0000161/// correct use of format strings.
162///
163/// HasVAListArg - A predicate indicating whether the printf-like
164/// function is passed an explicit va_arg argument (e.g., vprintf)
165///
166/// format_idx - The index into Args for the format string.
167///
168/// Improper format strings to functions in the printf family can be
169/// the source of bizarre bugs and very serious security holes. A
170/// good source of information is available in the following paper
171/// (which includes additional references):
Chris Lattner2e64c072007-08-10 20:18:51 +0000172///
173/// FormatGuard: Automatic Protection From printf Format String
174/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek081ed872007-08-14 17:39:48 +0000175///
176/// Functionality implemented:
177///
178/// We can statically check the following properties for string
179/// literal format strings for non v.*printf functions (where the
180/// arguments are passed directly):
181//
182/// (1) Are the number of format conversions equal to the number of
183/// data arguments?
184///
185/// (2) Does each format conversion correctly match the type of the
186/// corresponding data argument? (TODO)
187///
188/// Moreover, for all printf functions we can:
189///
190/// (3) Check for a missing format string (when not caught by type checking).
191///
192/// (4) Check for no-operation flags; e.g. using "#" with format
193/// conversion 'c' (TODO)
194///
195/// (5) Check the use of '%n', a major source of security holes.
196///
197/// (6) Check for malformed format conversions that don't specify anything.
198///
199/// (7) Check for empty format strings. e.g: printf("");
200///
201/// (8) Check that the format string is a wide literal.
202///
203/// All of these checks can be done by parsing the format string.
204///
205/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner2e64c072007-08-10 20:18:51 +0000206void
Ted Kremenek081ed872007-08-14 17:39:48 +0000207Sema::CheckPrintfArguments(Expr *Fn,
208 SourceLocation LParenLoc, SourceLocation RParenLoc,
209 bool HasVAListArg, FunctionDecl *FDecl,
Ted Kremenek30596542007-08-10 21:21:05 +0000210 unsigned format_idx, Expr** Args,
211 unsigned NumArgsInCall) {
Ted Kremenek081ed872007-08-14 17:39:48 +0000212 // CHECK: printf-like function is called with no format string.
213 if (format_idx >= NumArgsInCall) {
214 Diag(RParenLoc, diag::warn_printf_missing_format_string,
215 Fn->getSourceRange());
216 return;
217 }
218
Chris Lattnere65acc12007-08-25 05:36:18 +0000219 Expr *OrigFormatExpr = Args[format_idx];
220 // FIXME: This should go in a helper.
221 while (1) {
222 if (ParenExpr *PE = dyn_cast<ParenExpr>(OrigFormatExpr))
223 OrigFormatExpr = PE->getSubExpr();
224 else if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(OrigFormatExpr))
225 OrigFormatExpr = ICE->getSubExpr();
226 else
227 break;
228 }
229
Chris Lattner2e64c072007-08-10 20:18:51 +0000230 // CHECK: format string is not a string literal.
231 //
Ted Kremenek081ed872007-08-14 17:39:48 +0000232 // Dynamically generated format strings are difficult to
233 // automatically vet at compile time. Requiring that format strings
234 // are string literals: (1) permits the checking of format strings by
235 // the compiler and thereby (2) can practically remove the source of
236 // many format string exploits.
Chris Lattnere65acc12007-08-25 05:36:18 +0000237 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
Chris Lattner2e64c072007-08-10 20:18:51 +0000238
Ted Kremenek081ed872007-08-14 17:39:48 +0000239 if (FExpr == NULL) {
Ted Kremenek19398b62007-12-17 19:03:13 +0000240 // For vprintf* functions (i.e., HasVAListArg==true), we add a
241 // special check to see if the format string is a function parameter
242 // of the function calling the printf function. If the function
243 // has an attribute indicating it is a printf-like function, then we
244 // should suppress warnings concerning non-literals being used in a call
245 // to a vprintf function. For example:
246 //
247 // void
248 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
249 // va_list ap;
250 // va_start(ap, fmt);
251 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
252 // ...
253 //
254 //
255 // FIXME: We don't have full attribute support yet, so just check to see
256 // if the argument is a DeclRefExpr that references a parameter. We'll
257 // add proper support for checking the attribute later.
258 if (HasVAListArg)
259 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(IgnoreParen(OrigFormatExpr)))
260 if (isa<ParmVarDecl>(DR->getDecl()))
261 return;
262
263 Diag(Args[format_idx]->getLocStart(), diag::warn_printf_not_string_constant,
264 Fn->getSourceRange());
265
Ted Kremenek081ed872007-08-14 17:39:48 +0000266 return;
267 }
268
269 // CHECK: is the format string a wide literal?
270 if (FExpr->isWide()) {
271 Diag(Args[format_idx]->getLocStart(),
272 diag::warn_printf_format_string_is_wide_literal,
273 Fn->getSourceRange());
274 return;
275 }
276
277 // Str - The format string. NOTE: this is NOT null-terminated!
278 const char * const Str = FExpr->getStrData();
279
280 // CHECK: empty format string?
281 const unsigned StrLen = FExpr->getByteLength();
282
283 if (StrLen == 0) {
284 Diag(Args[format_idx]->getLocStart(),
285 diag::warn_printf_empty_format_string, Fn->getSourceRange());
286 return;
287 }
288
289 // We process the format string using a binary state machine. The
290 // current state is stored in CurrentState.
291 enum {
292 state_OrdChr,
293 state_Conversion
294 } CurrentState = state_OrdChr;
295
296 // numConversions - The number of conversions seen so far. This is
297 // incremented as we traverse the format string.
298 unsigned numConversions = 0;
299
300 // numDataArgs - The number of data arguments after the format
301 // string. This can only be determined for non vprintf-like
302 // functions. For those functions, this value is 1 (the sole
303 // va_arg argument).
304 unsigned numDataArgs = NumArgsInCall-(format_idx+1);
305
306 // Inspect the format string.
307 unsigned StrIdx = 0;
308
309 // LastConversionIdx - Index within the format string where we last saw
310 // a '%' character that starts a new format conversion.
311 unsigned LastConversionIdx = 0;
312
313 for ( ; StrIdx < StrLen ; ++StrIdx ) {
314
315 // Is the number of detected conversion conversions greater than
316 // the number of matching data arguments? If so, stop.
317 if (!HasVAListArg && numConversions > numDataArgs) break;
318
319 // Handle "\0"
320 if(Str[StrIdx] == '\0' ) {
321 // The string returned by getStrData() is not null-terminated,
322 // so the presence of a null character is likely an error.
323
324 SourceLocation Loc =
325 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),StrIdx+1);
326
327 Diag(Loc, diag::warn_printf_format_string_contains_null_char,
328 Fn->getSourceRange());
329
330 return;
331 }
332
333 // Ordinary characters (not processing a format conversion).
334 if (CurrentState == state_OrdChr) {
335 if (Str[StrIdx] == '%') {
336 CurrentState = state_Conversion;
337 LastConversionIdx = StrIdx;
338 }
339 continue;
340 }
341
342 // Seen '%'. Now processing a format conversion.
343 switch (Str[StrIdx]) {
Ted Kremenek035d8792007-10-12 20:51:52 +0000344 // Handle dynamic precision or width specifier.
345 case '*': {
346 ++numConversions;
347
348 if (!HasVAListArg && numConversions > numDataArgs) {
349
350 SourceLocation Loc =
351 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
352 StrIdx+1);
353
354 if (Str[StrIdx-1] == '.')
355 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
356 Fn->getSourceRange());
357 else
358 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
359 Fn->getSourceRange());
360
361 // Don't do any more checking. We'll just emit spurious errors.
362 return;
363 }
364
365 // Perform type checking on width/precision specifier.
366 Expr* E = Args[format_idx+numConversions];
367 QualType T = E->getType().getCanonicalType();
368 if (BuiltinType *BT = dyn_cast<BuiltinType>(T))
369 if (BT->getKind() == BuiltinType::Int)
370 break;
371
372 SourceLocation Loc =
373 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
374 StrIdx+1);
375
376 if (Str[StrIdx-1] == '.')
377 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
378 T.getAsString(), E->getSourceRange());
379 else
380 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
381 T.getAsString(), E->getSourceRange());
382
Ted Kremenek42166a82007-10-12 00:11:27 +0000383 break;
Ted Kremenek035d8792007-10-12 20:51:52 +0000384 }
Ted Kremenek42166a82007-10-12 00:11:27 +0000385
Ted Kremenek081ed872007-08-14 17:39:48 +0000386 // Characters which can terminate a format conversion
387 // (e.g. "%d"). Characters that specify length modifiers or
388 // other flags are handled by the default case below.
389 //
Ted Kremenek42166a82007-10-12 00:11:27 +0000390 // FIXME: additional checks will go into the following cases.
Ted Kremenek081ed872007-08-14 17:39:48 +0000391 case 'i':
392 case 'd':
393 case 'o':
394 case 'u':
395 case 'x':
396 case 'X':
397 case 'D':
398 case 'O':
399 case 'U':
400 case 'e':
401 case 'E':
402 case 'f':
403 case 'F':
404 case 'g':
405 case 'G':
406 case 'a':
407 case 'A':
408 case 'c':
409 case 'C':
410 case 'S':
411 case 's':
Chris Lattner04e04642007-08-26 17:39:38 +0000412 case 'p':
Ted Kremenek081ed872007-08-14 17:39:48 +0000413 ++numConversions;
414 CurrentState = state_OrdChr;
415 break;
416
417 // CHECK: Are we using "%n"? Issue a warning.
418 case 'n': {
419 ++numConversions;
420 CurrentState = state_OrdChr;
421 SourceLocation Loc =
422 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
423 LastConversionIdx+1);
424
425 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
426 break;
427 }
428
429 // Handle "%%"
430 case '%':
431 // Sanity check: Was the first "%" character the previous one?
432 // If not, we will assume that we have a malformed format
433 // conversion, and that the current "%" character is the start
434 // of a new conversion.
435 if (StrIdx - LastConversionIdx == 1)
436 CurrentState = state_OrdChr;
437 else {
438 // Issue a warning: invalid format conversion.
439 SourceLocation Loc =
440 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
441 LastConversionIdx+1);
442
443 Diag(Loc, diag::warn_printf_invalid_conversion,
Ted Kremenek035d8792007-10-12 20:51:52 +0000444 std::string(Str+LastConversionIdx, Str+StrIdx),
Ted Kremenek081ed872007-08-14 17:39:48 +0000445 Fn->getSourceRange());
446
447 // This conversion is broken. Advance to the next format
448 // conversion.
449 LastConversionIdx = StrIdx;
450 ++numConversions;
451 }
452
453 break;
454
455 default:
456 // This case catches all other characters: flags, widths, etc.
457 // We should eventually process those as well.
458 break;
459 }
460 }
461
462 if (CurrentState == state_Conversion) {
463 // Issue a warning: invalid format conversion.
464 SourceLocation Loc =
465 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
466 LastConversionIdx+1);
467
468 Diag(Loc, diag::warn_printf_invalid_conversion,
Chris Lattner6f65d202007-08-26 17:38:22 +0000469 std::string(Str+LastConversionIdx,
470 Str+std::min(LastConversionIdx+2, StrLen)),
Ted Kremenek081ed872007-08-14 17:39:48 +0000471 Fn->getSourceRange());
472 return;
473 }
474
475 if (!HasVAListArg) {
476 // CHECK: Does the number of format conversions exceed the number
477 // of data arguments?
478 if (numConversions > numDataArgs) {
479 SourceLocation Loc =
480 PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),
481 LastConversionIdx);
482
483 Diag(Loc, diag::warn_printf_insufficient_data_args,
484 Fn->getSourceRange());
485 }
486 // CHECK: Does the number of data arguments exceed the number of
487 // format conversions in the format string?
488 else if (numConversions < numDataArgs)
489 Diag(Args[format_idx+numConversions+1]->getLocStart(),
490 diag::warn_printf_too_many_data_args, Fn->getSourceRange());
491 }
492}
Ted Kremenek45925ab2007-08-17 16:46:58 +0000493
494//===--- CHECK: Return Address of Stack Variable --------------------------===//
495
496static DeclRefExpr* EvalVal(Expr *E);
497static DeclRefExpr* EvalAddr(Expr* E);
498
499/// CheckReturnStackAddr - Check if a return statement returns the address
500/// of a stack variable.
501void
502Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
503 SourceLocation ReturnLoc) {
504
505 // Perform checking for returned stack addresses.
506 if (lhsType->isPointerType()) {
507 if (DeclRefExpr *DR = EvalAddr(RetValExp))
508 Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
509 DR->getDecl()->getIdentifier()->getName(),
510 RetValExp->getSourceRange());
511 }
512 // Perform checking for stack values returned by reference.
513 else if (lhsType->isReferenceType()) {
Ted Kremenek1456f202007-08-27 16:39:17 +0000514 // Check for an implicit cast to a reference.
515 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
516 if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
517 Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
518 DR->getDecl()->getIdentifier()->getName(),
519 RetValExp->getSourceRange());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000520 }
521}
522
523/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
524/// check if the expression in a return statement evaluates to an address
525/// to a location on the stack. The recursion is used to traverse the
526/// AST of the return expression, with recursion backtracking when we
527/// encounter a subexpression that (1) clearly does not lead to the address
528/// of a stack variable or (2) is something we cannot determine leads to
529/// the address of a stack variable based on such local checking.
530///
Ted Kremenekda1300a2007-08-28 17:02:55 +0000531/// EvalAddr processes expressions that are pointers that are used as
532/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek45925ab2007-08-17 16:46:58 +0000533/// At the base case of the recursion is a check for a DeclRefExpr* in
534/// the refers to a stack variable.
535///
536/// This implementation handles:
537///
538/// * pointer-to-pointer casts
539/// * implicit conversions from array references to pointers
540/// * taking the address of fields
541/// * arbitrary interplay between "&" and "*" operators
542/// * pointer arithmetic from an address of a stack variable
543/// * taking the address of an array element where the array is on the stack
544static DeclRefExpr* EvalAddr(Expr *E) {
545
546 // We should only be called for evaluating pointer expressions.
547 assert (E->getType()->isPointerType() && "EvalAddr only works on pointers");
548
549 // Our "symbolic interpreter" is just a dispatch off the currently
550 // viewed AST node. We then recursively traverse the AST by calling
551 // EvalAddr and EvalVal appropriately.
552 switch (E->getStmtClass()) {
553
554 case Stmt::ParenExprClass:
555 // Ignore parentheses.
556 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
557
558 case Stmt::UnaryOperatorClass: {
559 // The only unary operator that make sense to handle here
560 // is AddrOf. All others don't make sense as pointers.
561 UnaryOperator *U = cast<UnaryOperator>(E);
562
563 if (U->getOpcode() == UnaryOperator::AddrOf)
564 return EvalVal(U->getSubExpr());
565 else
566 return NULL;
567 }
568
569 case Stmt::BinaryOperatorClass: {
570 // Handle pointer arithmetic. All other binary operators are not valid
571 // in this context.
572 BinaryOperator *B = cast<BinaryOperator>(E);
573 BinaryOperator::Opcode op = B->getOpcode();
574
575 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
576 return NULL;
577
578 Expr *Base = B->getLHS();
579
580 // Determine which argument is the real pointer base. It could be
581 // the RHS argument instead of the LHS.
582 if (!Base->getType()->isPointerType()) Base = B->getRHS();
583
584 assert (Base->getType()->isPointerType());
585 return EvalAddr(Base);
586 }
587
588 // For conditional operators we need to see if either the LHS or RHS are
589 // valid DeclRefExpr*s. If one of them is valid, we return it.
590 case Stmt::ConditionalOperatorClass: {
591 ConditionalOperator *C = cast<ConditionalOperator>(E);
592
Anders Carlsson37365fc2007-11-30 19:04:31 +0000593 // Handle the GNU extension for missing LHS.
594 if (Expr *lhsExpr = C->getLHS())
595 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
596 return LHS;
597
598 return EvalAddr(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000599 }
600
601 // For implicit casts, we need to handle conversions from arrays to
602 // pointer values, and implicit pointer-to-pointer conversions.
603 case Stmt::ImplicitCastExprClass: {
604 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
605 Expr* SubExpr = IE->getSubExpr();
606
607 if (SubExpr->getType()->isPointerType())
608 return EvalAddr(SubExpr);
609 else
610 return EvalVal(SubExpr);
611 }
612
613 // For casts, we handle pointer-to-pointer conversions (which
614 // is essentially a no-op from our mini-interpreter's standpoint).
615 // For other casts we abort.
616 case Stmt::CastExprClass: {
617 CastExpr *C = cast<CastExpr>(E);
618 Expr *SubExpr = C->getSubExpr();
619
620 if (SubExpr->getType()->isPointerType())
621 return EvalAddr(SubExpr);
622 else
623 return NULL;
624 }
625
Ted Kremenek1c1700f2007-08-20 16:18:38 +0000626 // C++ casts. For dynamic casts, static casts, and const casts, we
627 // are always converting from a pointer-to-pointer, so we just blow
628 // through the cast. In the case the dynamic cast doesn't fail
629 // (and return NULL), we take the conservative route and report cases
630 // where we return the address of a stack variable. For Reinterpre
631 case Stmt::CXXCastExprClass: {
632 CXXCastExpr *C = cast<CXXCastExpr>(E);
633
634 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
635 Expr *S = C->getSubExpr();
636 if (S->getType()->isPointerType())
637 return EvalAddr(S);
638 else
639 return NULL;
640 }
641 else
642 return EvalAddr(C->getSubExpr());
643 }
Ted Kremenek45925ab2007-08-17 16:46:58 +0000644
645 // Everything else: we simply don't reason about them.
646 default:
647 return NULL;
648 }
649}
650
651
652/// EvalVal - This function is complements EvalAddr in the mutual recursion.
653/// See the comments for EvalAddr for more details.
654static DeclRefExpr* EvalVal(Expr *E) {
655
Ted Kremenekda1300a2007-08-28 17:02:55 +0000656 // We should only be called for evaluating non-pointer expressions, or
657 // expressions with a pointer type that are not used as references but instead
658 // are l-values (e.g., DeclRefExpr with a pointer type).
659
Ted Kremenek45925ab2007-08-17 16:46:58 +0000660 // Our "symbolic interpreter" is just a dispatch off the currently
661 // viewed AST node. We then recursively traverse the AST by calling
662 // EvalAddr and EvalVal appropriately.
663 switch (E->getStmtClass()) {
664
665 case Stmt::DeclRefExprClass: {
666 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
667 // at code that refers to a variable's name. We check if it has local
668 // storage within the function, and if so, return the expression.
669 DeclRefExpr *DR = cast<DeclRefExpr>(E);
670
671 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
672 if(V->hasLocalStorage()) return DR;
673
674 return NULL;
675 }
676
677 case Stmt::ParenExprClass:
678 // Ignore parentheses.
679 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
680
681 case Stmt::UnaryOperatorClass: {
682 // The only unary operator that make sense to handle here
683 // is Deref. All others don't resolve to a "name." This includes
684 // handling all sorts of rvalues passed to a unary operator.
685 UnaryOperator *U = cast<UnaryOperator>(E);
686
687 if (U->getOpcode() == UnaryOperator::Deref)
688 return EvalAddr(U->getSubExpr());
689
690 return NULL;
691 }
692
693 case Stmt::ArraySubscriptExprClass: {
694 // Array subscripts are potential references to data on the stack. We
695 // retrieve the DeclRefExpr* for the array variable if it indeed
696 // has local storage.
Ted Kremenek1c1700f2007-08-20 16:18:38 +0000697 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000698 }
699
700 case Stmt::ConditionalOperatorClass: {
701 // For conditional operators we need to see if either the LHS or RHS are
702 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
703 ConditionalOperator *C = cast<ConditionalOperator>(E);
704
Anders Carlsson37365fc2007-11-30 19:04:31 +0000705 // Handle the GNU extension for missing LHS.
706 if (Expr *lhsExpr = C->getLHS())
707 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
708 return LHS;
709
710 return EvalVal(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000711 }
712
713 // Accesses to members are potential references to data on the stack.
714 case Stmt::MemberExprClass: {
715 MemberExpr *M = cast<MemberExpr>(E);
716
717 // Check for indirect access. We only want direct field accesses.
718 if (!M->isArrow())
719 return EvalVal(M->getBase());
720 else
721 return NULL;
722 }
723
724 // Everything else: we simply don't reason about them.
725 default:
726 return NULL;
727 }
728}
Ted Kremenek30c66752007-11-25 00:58:00 +0000729
730//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
731
732/// Check for comparisons of floating point operands using != and ==.
733/// Issue a warning if these are no self-comparisons, as they are not likely
734/// to do what the programmer intended.
735void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
736 bool EmitWarning = true;
737
738 Expr* LeftExprSansParen = IgnoreParen(lex);
739 Expr* RightExprSansParen = IgnoreParen(rex);
740
741 // Special case: check for x == x (which is OK).
742 // Do not emit warnings for such cases.
743 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
744 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
745 if (DRL->getDecl() == DRR->getDecl())
746 EmitWarning = false;
747
Ted Kremenek33159832007-11-29 00:59:04 +0000748
749 // Special case: check for comparisons against literals that can be exactly
750 // represented by APFloat. In such cases, do not emit a warning. This
751 // is a heuristic: often comparison against such literals are used to
752 // detect if a value in a variable has not changed. This clearly can
753 // lead to false negatives.
754 if (EmitWarning) {
755 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
756 if (FLL->isExact())
757 EmitWarning = false;
758 }
759 else
760 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
761 if (FLR->isExact())
762 EmitWarning = false;
763 }
764 }
765
Ted Kremenek30c66752007-11-25 00:58:00 +0000766 // Check for comparisons with builtin types.
767 if (EmitWarning)
768 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
769 if (isCallBuiltin(CL))
770 EmitWarning = false;
771
772 if (EmitWarning)
773 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
774 if (isCallBuiltin(CR))
775 EmitWarning = false;
776
777 // Emit the diagnostic.
778 if (EmitWarning)
779 Diag(loc, diag::warn_floatingpoint_eq,
780 lex->getSourceRange(),rex->getSourceRange());
781}