blob: 76363c9dc0a358b90c67686ab20a4ba9e36b38fb [file] [log] [blame]
Chris Lattner59907c42007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner59907c42007-08-10 20:18:51 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/Decl.h"
18#include "clang/AST/Expr.h"
Ted Kremenek23245122007-08-20 16:18:38 +000019#include "clang/AST/ExprCXX.h"
Chris Lattner59907c42007-08-10 20:18:51 +000020#include "clang/Lex/Preprocessor.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/Diagnostic.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/TargetInfo.h"
26#include "llvm/ADT/SmallString.h"
27#include "llvm/ADT/StringExtras.h"
Ted Kremenek588e5eb2007-11-25 00:58:00 +000028#include "SemaUtil.h"
Chris Lattner59907c42007-08-10 20:18:51 +000029using namespace clang;
30
31/// CheckFunctionCall - Check a direct function call for various correctness
32/// and safety properties not strictly enforced by the C type system.
Anders Carlsson71993dd2007-08-17 05:31:46 +000033bool
Chris Lattner925e60d2007-12-28 05:29:59 +000034Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
Chris Lattner59907c42007-08-10 20:18:51 +000035
36 // Get the IdentifierInfo* for the called function.
37 IdentifierInfo *FnInfo = FDecl->getIdentifier();
38
Chris Lattner30ce3442007-12-19 23:59:04 +000039 switch (FnInfo->getBuiltinID()) {
40 case Builtin::BI__builtin___CFStringMakeConstantString:
Chris Lattner925e60d2007-12-28 05:29:59 +000041 assert(TheCall->getNumArgs() == 1 &&
Chris Lattner1b9a0792007-12-20 00:26:33 +000042 "Wrong # arguments to builtin CFStringMakeConstantString");
Chris Lattner925e60d2007-12-28 05:29:59 +000043 return CheckBuiltinCFStringArgument(TheCall->getArg(0));
Chris Lattner30ce3442007-12-19 23:59:04 +000044 case Builtin::BI__builtin_va_start:
Chris Lattner925e60d2007-12-28 05:29:59 +000045 return SemaBuiltinVAStart(TheCall);
Chris Lattner1b9a0792007-12-20 00:26:33 +000046
47 case Builtin::BI__builtin_isgreater:
48 case Builtin::BI__builtin_isgreaterequal:
49 case Builtin::BI__builtin_isless:
50 case Builtin::BI__builtin_islessequal:
51 case Builtin::BI__builtin_islessgreater:
52 case Builtin::BI__builtin_isunordered:
Chris Lattner925e60d2007-12-28 05:29:59 +000053 return SemaBuiltinUnorderedCompare(TheCall);
Anders Carlsson71993dd2007-08-17 05:31:46 +000054 }
55
Chris Lattner59907c42007-08-10 20:18:51 +000056 // Search the KnownFunctionIDs for the identifier.
57 unsigned i = 0, e = id_num_known_functions;
Ted Kremenek71895b92007-08-14 17:39:48 +000058 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
Anders Carlsson9cdc4d32007-08-17 15:44:17 +000059 if (i == e) return false;
Chris Lattner59907c42007-08-10 20:18:51 +000060
61 // Printf checking.
62 if (i <= id_vprintf) {
Ted Kremenek71895b92007-08-14 17:39:48 +000063 // Retrieve the index of the format string parameter and determine
64 // if the function is passed a va_arg argument.
Chris Lattner59907c42007-08-10 20:18:51 +000065 unsigned format_idx = 0;
Ted Kremenek71895b92007-08-14 17:39:48 +000066 bool HasVAListArg = false;
67
Chris Lattner59907c42007-08-10 20:18:51 +000068 switch (i) {
Chris Lattner30ce3442007-12-19 23:59:04 +000069 default: assert(false && "No format string argument index.");
70 case id_printf: format_idx = 0; break;
71 case id_fprintf: format_idx = 1; break;
72 case id_sprintf: format_idx = 1; break;
73 case id_snprintf: format_idx = 2; break;
74 case id_asprintf: format_idx = 1; break;
75 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
76 case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
77 case id_vfprintf: format_idx = 1; HasVAListArg = true; break;
78 case id_vsprintf: format_idx = 1; HasVAListArg = true; break;
79 case id_vprintf: format_idx = 0; HasVAListArg = true; break;
Ted Kremenek71895b92007-08-14 17:39:48 +000080 }
81
Chris Lattner925e60d2007-12-28 05:29:59 +000082 CheckPrintfArguments(TheCall, HasVAListArg, format_idx);
Chris Lattner59907c42007-08-10 20:18:51 +000083 }
Anders Carlsson71993dd2007-08-17 05:31:46 +000084
Anders Carlsson9cdc4d32007-08-17 15:44:17 +000085 return false;
Anders Carlsson71993dd2007-08-17 05:31:46 +000086}
87
88/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
89/// CFString constructor is correct
Chris Lattnercc6f65d2007-08-25 05:30:33 +000090bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
Chris Lattner998568f2007-12-28 05:38:24 +000091 Arg = IgnoreParenCasts(Arg);
Anders Carlsson71993dd2007-08-17 05:31:46 +000092
93 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
94
95 if (!Literal || Literal->isWide()) {
96 Diag(Arg->getLocStart(),
97 diag::err_cfstring_literal_not_string_constant,
98 Arg->getSourceRange());
Anders Carlsson9cdc4d32007-08-17 15:44:17 +000099 return true;
Anders Carlsson71993dd2007-08-17 05:31:46 +0000100 }
101
102 const char *Data = Literal->getStrData();
103 unsigned Length = Literal->getByteLength();
104
105 for (unsigned i = 0; i < Length; ++i) {
106 if (!isascii(Data[i])) {
107 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
108 diag::warn_cfstring_literal_contains_non_ascii_character,
109 Arg->getSourceRange());
110 break;
111 }
112
113 if (!Data[i]) {
114 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
115 diag::warn_cfstring_literal_contains_nul_character,
116 Arg->getSourceRange());
117 break;
118 }
119 }
120
Anders Carlsson9cdc4d32007-08-17 15:44:17 +0000121 return false;
Chris Lattner59907c42007-08-10 20:18:51 +0000122}
123
Chris Lattnerc27c6652007-12-20 00:05:45 +0000124/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
125/// Emit an error and return true on failure, return false on success.
Chris Lattner925e60d2007-12-28 05:29:59 +0000126bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
127 Expr *Fn = TheCall->getCallee();
128 if (TheCall->getNumArgs() > 2) {
129 Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattner30ce3442007-12-19 23:59:04 +0000130 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
Chris Lattner925e60d2007-12-28 05:29:59 +0000131 SourceRange(TheCall->getArg(2)->getLocStart(),
132 (*(TheCall->arg_end()-1))->getLocEnd()));
Chris Lattner30ce3442007-12-19 23:59:04 +0000133 return true;
134 }
135
Chris Lattnerc27c6652007-12-20 00:05:45 +0000136 // Determine whether the current function is variadic or not.
137 bool isVariadic;
Chris Lattner30ce3442007-12-19 23:59:04 +0000138 if (CurFunctionDecl)
Chris Lattnerc27c6652007-12-20 00:05:45 +0000139 isVariadic =
140 cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic();
Chris Lattner30ce3442007-12-19 23:59:04 +0000141 else
Chris Lattnerc27c6652007-12-20 00:05:45 +0000142 isVariadic = CurMethodDecl->isVariadic();
Chris Lattner30ce3442007-12-19 23:59:04 +0000143
Chris Lattnerc27c6652007-12-20 00:05:45 +0000144 if (!isVariadic) {
Chris Lattner30ce3442007-12-19 23:59:04 +0000145 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
146 return true;
147 }
148
149 // Verify that the second argument to the builtin is the last argument of the
150 // current function or method.
151 bool SecondArgIsLastNamedArgument = false;
Anders Carlsson88cf2262008-02-11 04:20:54 +0000152 const Expr *Arg = TheCall->getArg(1);
153 while (1) {
154 if (const ParenExpr *PE = dyn_cast<ParenExpr>(Arg))
155 Arg = PE->getSubExpr();
156 else if (const ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(Arg))
157 Arg = CE->getSubExpr();
158 else
159 break;
160 }
161
162 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
163 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Chris Lattner30ce3442007-12-19 23:59:04 +0000164 // FIXME: This isn't correct for methods (results in bogus warning).
165 // Get the last formal in the current function.
Anders Carlsson88cf2262008-02-11 04:20:54 +0000166 const ParmVarDecl *LastArg;
Chris Lattner30ce3442007-12-19 23:59:04 +0000167 if (CurFunctionDecl)
168 LastArg = *(CurFunctionDecl->param_end()-1);
169 else
170 LastArg = *(CurMethodDecl->param_end()-1);
171 SecondArgIsLastNamedArgument = PV == LastArg;
172 }
173 }
174
175 if (!SecondArgIsLastNamedArgument)
Chris Lattner925e60d2007-12-28 05:29:59 +0000176 Diag(TheCall->getArg(1)->getLocStart(),
Chris Lattner30ce3442007-12-19 23:59:04 +0000177 diag::warn_second_parameter_of_va_start_not_last_named_argument);
178 return false;
179}
180
Chris Lattner1b9a0792007-12-20 00:26:33 +0000181/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
182/// friends. This is declared to take (...), so we have to check everything.
Chris Lattner925e60d2007-12-28 05:29:59 +0000183bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
184 if (TheCall->getNumArgs() < 2)
185 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args);
186 if (TheCall->getNumArgs() > 2)
187 return Diag(TheCall->getArg(2)->getLocStart(),
188 diag::err_typecheck_call_too_many_args,
189 SourceRange(TheCall->getArg(2)->getLocStart(),
190 (*(TheCall->arg_end()-1))->getLocEnd()));
Chris Lattner1b9a0792007-12-20 00:26:33 +0000191
Chris Lattner925e60d2007-12-28 05:29:59 +0000192 Expr *OrigArg0 = TheCall->getArg(0);
193 Expr *OrigArg1 = TheCall->getArg(1);
Chris Lattner1b9a0792007-12-20 00:26:33 +0000194
195 // Do standard promotions between the two arguments, returning their common
196 // type.
Chris Lattner925e60d2007-12-28 05:29:59 +0000197 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
Chris Lattner1b9a0792007-12-20 00:26:33 +0000198
199 // If the common type isn't a real floating type, then the arguments were
200 // invalid for this operation.
201 if (!Res->isRealFloatingType())
Chris Lattner925e60d2007-12-28 05:29:59 +0000202 return Diag(OrigArg0->getLocStart(),
Chris Lattner1b9a0792007-12-20 00:26:33 +0000203 diag::err_typecheck_call_invalid_ordered_compare,
204 OrigArg0->getType().getAsString(),
205 OrigArg1->getType().getAsString(),
Chris Lattner925e60d2007-12-28 05:29:59 +0000206 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()));
Chris Lattner1b9a0792007-12-20 00:26:33 +0000207
208 return false;
209}
210
Chris Lattner30ce3442007-12-19 23:59:04 +0000211
Chris Lattner59907c42007-08-10 20:18:51 +0000212/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek71895b92007-08-14 17:39:48 +0000213/// correct use of format strings.
214///
215/// HasVAListArg - A predicate indicating whether the printf-like
216/// function is passed an explicit va_arg argument (e.g., vprintf)
217///
218/// format_idx - The index into Args for the format string.
219///
220/// Improper format strings to functions in the printf family can be
221/// the source of bizarre bugs and very serious security holes. A
222/// good source of information is available in the following paper
223/// (which includes additional references):
Chris Lattner59907c42007-08-10 20:18:51 +0000224///
225/// FormatGuard: Automatic Protection From printf Format String
226/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek71895b92007-08-14 17:39:48 +0000227///
228/// Functionality implemented:
229///
230/// We can statically check the following properties for string
231/// literal format strings for non v.*printf functions (where the
232/// arguments are passed directly):
233//
234/// (1) Are the number of format conversions equal to the number of
235/// data arguments?
236///
237/// (2) Does each format conversion correctly match the type of the
238/// corresponding data argument? (TODO)
239///
240/// Moreover, for all printf functions we can:
241///
242/// (3) Check for a missing format string (when not caught by type checking).
243///
244/// (4) Check for no-operation flags; e.g. using "#" with format
245/// conversion 'c' (TODO)
246///
247/// (5) Check the use of '%n', a major source of security holes.
248///
249/// (6) Check for malformed format conversions that don't specify anything.
250///
251/// (7) Check for empty format strings. e.g: printf("");
252///
253/// (8) Check that the format string is a wide literal.
254///
255/// All of these checks can be done by parsing the format string.
256///
257/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner59907c42007-08-10 20:18:51 +0000258void
Chris Lattner925e60d2007-12-28 05:29:59 +0000259Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
260 unsigned format_idx) {
261 Expr *Fn = TheCall->getCallee();
262
Ted Kremenek71895b92007-08-14 17:39:48 +0000263 // CHECK: printf-like function is called with no format string.
Chris Lattner925e60d2007-12-28 05:29:59 +0000264 if (format_idx >= TheCall->getNumArgs()) {
265 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string,
Ted Kremenek71895b92007-08-14 17:39:48 +0000266 Fn->getSourceRange());
267 return;
268 }
269
Chris Lattner998568f2007-12-28 05:38:24 +0000270 Expr *OrigFormatExpr = IgnoreParenCasts(TheCall->getArg(format_idx));
Chris Lattner459e8482007-08-25 05:36:18 +0000271
Chris Lattner59907c42007-08-10 20:18:51 +0000272 // CHECK: format string is not a string literal.
273 //
Ted Kremenek71895b92007-08-14 17:39:48 +0000274 // Dynamically generated format strings are difficult to
275 // automatically vet at compile time. Requiring that format strings
276 // are string literals: (1) permits the checking of format strings by
277 // the compiler and thereby (2) can practically remove the source of
278 // many format string exploits.
Chris Lattner459e8482007-08-25 05:36:18 +0000279 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
Ted Kremenek71895b92007-08-14 17:39:48 +0000280 if (FExpr == NULL) {
Ted Kremenek4a336462007-12-17 19:03:13 +0000281 // For vprintf* functions (i.e., HasVAListArg==true), we add a
282 // special check to see if the format string is a function parameter
283 // of the function calling the printf function. If the function
284 // has an attribute indicating it is a printf-like function, then we
285 // should suppress warnings concerning non-literals being used in a call
286 // to a vprintf function. For example:
287 //
288 // void
289 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
290 // va_list ap;
291 // va_start(ap, fmt);
292 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
293 // ...
294 //
295 //
296 // FIXME: We don't have full attribute support yet, so just check to see
297 // if the argument is a DeclRefExpr that references a parameter. We'll
298 // add proper support for checking the attribute later.
299 if (HasVAListArg)
Chris Lattner998568f2007-12-28 05:38:24 +0000300 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
301 if (isa<ParmVarDecl>(DR->getDecl()))
Ted Kremenek4a336462007-12-17 19:03:13 +0000302 return;
303
Chris Lattner925e60d2007-12-28 05:29:59 +0000304 Diag(TheCall->getArg(format_idx)->getLocStart(),
305 diag::warn_printf_not_string_constant, Fn->getSourceRange());
Ted Kremenek71895b92007-08-14 17:39:48 +0000306 return;
307 }
308
309 // CHECK: is the format string a wide literal?
310 if (FExpr->isWide()) {
Chris Lattner925e60d2007-12-28 05:29:59 +0000311 Diag(FExpr->getLocStart(),
312 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange());
Ted Kremenek71895b92007-08-14 17:39:48 +0000313 return;
314 }
315
316 // Str - The format string. NOTE: this is NOT null-terminated!
317 const char * const Str = FExpr->getStrData();
318
319 // CHECK: empty format string?
320 const unsigned StrLen = FExpr->getByteLength();
321
322 if (StrLen == 0) {
Chris Lattner925e60d2007-12-28 05:29:59 +0000323 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string,
324 Fn->getSourceRange());
Ted Kremenek71895b92007-08-14 17:39:48 +0000325 return;
326 }
327
328 // We process the format string using a binary state machine. The
329 // current state is stored in CurrentState.
330 enum {
331 state_OrdChr,
332 state_Conversion
333 } CurrentState = state_OrdChr;
334
335 // numConversions - The number of conversions seen so far. This is
336 // incremented as we traverse the format string.
337 unsigned numConversions = 0;
338
339 // numDataArgs - The number of data arguments after the format
340 // string. This can only be determined for non vprintf-like
341 // functions. For those functions, this value is 1 (the sole
342 // va_arg argument).
Chris Lattner925e60d2007-12-28 05:29:59 +0000343 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
Ted Kremenek71895b92007-08-14 17:39:48 +0000344
345 // Inspect the format string.
346 unsigned StrIdx = 0;
347
348 // LastConversionIdx - Index within the format string where we last saw
349 // a '%' character that starts a new format conversion.
350 unsigned LastConversionIdx = 0;
351
Chris Lattner925e60d2007-12-28 05:29:59 +0000352 for (; StrIdx < StrLen; ++StrIdx) {
Chris Lattner998568f2007-12-28 05:38:24 +0000353
Ted Kremenek71895b92007-08-14 17:39:48 +0000354 // Is the number of detected conversion conversions greater than
355 // the number of matching data arguments? If so, stop.
356 if (!HasVAListArg && numConversions > numDataArgs) break;
357
358 // Handle "\0"
Chris Lattner925e60d2007-12-28 05:29:59 +0000359 if (Str[StrIdx] == '\0') {
Ted Kremenek71895b92007-08-14 17:39:48 +0000360 // The string returned by getStrData() is not null-terminated,
361 // so the presence of a null character is likely an error.
Chris Lattner998568f2007-12-28 05:38:24 +0000362 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
363 diag::warn_printf_format_string_contains_null_char,
Ted Kremenek71895b92007-08-14 17:39:48 +0000364 Fn->getSourceRange());
Ted Kremenek71895b92007-08-14 17:39:48 +0000365 return;
366 }
367
368 // Ordinary characters (not processing a format conversion).
369 if (CurrentState == state_OrdChr) {
370 if (Str[StrIdx] == '%') {
371 CurrentState = state_Conversion;
372 LastConversionIdx = StrIdx;
373 }
374 continue;
375 }
376
377 // Seen '%'. Now processing a format conversion.
378 switch (Str[StrIdx]) {
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000379 // Handle dynamic precision or width specifier.
380 case '*': {
381 ++numConversions;
382
383 if (!HasVAListArg && numConversions > numDataArgs) {
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000384 SourceLocation Loc = FExpr->getLocStart();
385 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
Ted Kremenek580b6642007-10-12 20:51:52 +0000386
Ted Kremenek580b6642007-10-12 20:51:52 +0000387 if (Str[StrIdx-1] == '.')
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000388 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
389 Fn->getSourceRange());
Ted Kremenek580b6642007-10-12 20:51:52 +0000390 else
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000391 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
392 Fn->getSourceRange());
Ted Kremenek580b6642007-10-12 20:51:52 +0000393
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000394 // Don't do any more checking. We'll just emit spurious errors.
395 return;
Ted Kremenek580b6642007-10-12 20:51:52 +0000396 }
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000397
398 // Perform type checking on width/precision specifier.
399 Expr *E = TheCall->getArg(format_idx+numConversions);
400 if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
401 if (BT->getKind() == BuiltinType::Int)
402 break;
Ted Kremenek71895b92007-08-14 17:39:48 +0000403
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000404 SourceLocation Loc =
405 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
406
407 if (Str[StrIdx-1] == '.')
408 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
409 E->getType().getAsString(), E->getSourceRange());
410 else
411 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
412 E->getType().getAsString(), E->getSourceRange());
413
414 break;
415 }
416
417 // Characters which can terminate a format conversion
418 // (e.g. "%d"). Characters that specify length modifiers or
419 // other flags are handled by the default case below.
420 //
421 // FIXME: additional checks will go into the following cases.
422 case 'i':
423 case 'd':
424 case 'o':
425 case 'u':
426 case 'x':
427 case 'X':
428 case 'D':
429 case 'O':
430 case 'U':
431 case 'e':
432 case 'E':
433 case 'f':
434 case 'F':
435 case 'g':
436 case 'G':
437 case 'a':
438 case 'A':
439 case 'c':
440 case 'C':
441 case 'S':
442 case 's':
443 case 'p':
444 ++numConversions;
445 CurrentState = state_OrdChr;
446 break;
447
448 // CHECK: Are we using "%n"? Issue a warning.
449 case 'n': {
450 ++numConversions;
451 CurrentState = state_OrdChr;
452 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
453 LastConversionIdx+1);
454
455 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
456 break;
457 }
458
459 // Handle "%%"
460 case '%':
461 // Sanity check: Was the first "%" character the previous one?
462 // If not, we will assume that we have a malformed format
463 // conversion, and that the current "%" character is the start
464 // of a new conversion.
465 if (StrIdx - LastConversionIdx == 1)
466 CurrentState = state_OrdChr;
467 else {
468 // Issue a warning: invalid format conversion.
Chris Lattner925e60d2007-12-28 05:29:59 +0000469 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
470 LastConversionIdx+1);
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000471
472 Diag(Loc, diag::warn_printf_invalid_conversion,
473 std::string(Str+LastConversionIdx, Str+StrIdx),
474 Fn->getSourceRange());
475
476 // This conversion is broken. Advance to the next format
477 // conversion.
478 LastConversionIdx = StrIdx;
479 ++numConversions;
Ted Kremenek71895b92007-08-14 17:39:48 +0000480 }
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000481 break;
Ted Kremenek71895b92007-08-14 17:39:48 +0000482
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000483 default:
484 // This case catches all other characters: flags, widths, etc.
485 // We should eventually process those as well.
486 break;
Ted Kremenek71895b92007-08-14 17:39:48 +0000487 }
488 }
489
490 if (CurrentState == state_Conversion) {
491 // Issue a warning: invalid format conversion.
Chris Lattner925e60d2007-12-28 05:29:59 +0000492 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
493 LastConversionIdx+1);
Ted Kremenek71895b92007-08-14 17:39:48 +0000494
495 Diag(Loc, diag::warn_printf_invalid_conversion,
Chris Lattnera9e2ea12007-08-26 17:38:22 +0000496 std::string(Str+LastConversionIdx,
497 Str+std::min(LastConversionIdx+2, StrLen)),
Ted Kremenek71895b92007-08-14 17:39:48 +0000498 Fn->getSourceRange());
499 return;
500 }
501
502 if (!HasVAListArg) {
503 // CHECK: Does the number of format conversions exceed the number
504 // of data arguments?
505 if (numConversions > numDataArgs) {
Chris Lattner925e60d2007-12-28 05:29:59 +0000506 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
507 LastConversionIdx);
Ted Kremenek71895b92007-08-14 17:39:48 +0000508
509 Diag(Loc, diag::warn_printf_insufficient_data_args,
510 Fn->getSourceRange());
511 }
512 // CHECK: Does the number of data arguments exceed the number of
513 // format conversions in the format string?
514 else if (numConversions < numDataArgs)
Chris Lattner925e60d2007-12-28 05:29:59 +0000515 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
Ted Kremenek71895b92007-08-14 17:39:48 +0000516 diag::warn_printf_too_many_data_args, Fn->getSourceRange());
517 }
518}
Ted Kremenek06de2762007-08-17 16:46:58 +0000519
520//===--- CHECK: Return Address of Stack Variable --------------------------===//
521
522static DeclRefExpr* EvalVal(Expr *E);
523static DeclRefExpr* EvalAddr(Expr* E);
524
525/// CheckReturnStackAddr - Check if a return statement returns the address
526/// of a stack variable.
527void
528Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
529 SourceLocation ReturnLoc) {
530
531 // Perform checking for returned stack addresses.
532 if (lhsType->isPointerType()) {
533 if (DeclRefExpr *DR = EvalAddr(RetValExp))
534 Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
535 DR->getDecl()->getIdentifier()->getName(),
536 RetValExp->getSourceRange());
537 }
538 // Perform checking for stack values returned by reference.
539 else if (lhsType->isReferenceType()) {
Ted Kremenek96eabe02007-08-27 16:39:17 +0000540 // Check for an implicit cast to a reference.
541 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
542 if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
543 Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
544 DR->getDecl()->getIdentifier()->getName(),
545 RetValExp->getSourceRange());
Ted Kremenek06de2762007-08-17 16:46:58 +0000546 }
547}
548
549/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
550/// check if the expression in a return statement evaluates to an address
551/// to a location on the stack. The recursion is used to traverse the
552/// AST of the return expression, with recursion backtracking when we
553/// encounter a subexpression that (1) clearly does not lead to the address
554/// of a stack variable or (2) is something we cannot determine leads to
555/// the address of a stack variable based on such local checking.
556///
Ted Kremeneke8c600f2007-08-28 17:02:55 +0000557/// EvalAddr processes expressions that are pointers that are used as
558/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek06de2762007-08-17 16:46:58 +0000559/// At the base case of the recursion is a check for a DeclRefExpr* in
560/// the refers to a stack variable.
561///
562/// This implementation handles:
563///
564/// * pointer-to-pointer casts
565/// * implicit conversions from array references to pointers
566/// * taking the address of fields
567/// * arbitrary interplay between "&" and "*" operators
568/// * pointer arithmetic from an address of a stack variable
569/// * taking the address of an array element where the array is on the stack
570static DeclRefExpr* EvalAddr(Expr *E) {
Ted Kremenek06de2762007-08-17 16:46:58 +0000571 // We should only be called for evaluating pointer expressions.
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000572 assert((E->getType()->isPointerType() ||
Ted Kremeneka526c5c2008-01-07 19:49:32 +0000573 E->getType()->isObjCQualifiedIdType()) &&
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000574 "EvalAddr only works on pointers");
Ted Kremenek06de2762007-08-17 16:46:58 +0000575
576 // Our "symbolic interpreter" is just a dispatch off the currently
577 // viewed AST node. We then recursively traverse the AST by calling
578 // EvalAddr and EvalVal appropriately.
579 switch (E->getStmtClass()) {
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000580 case Stmt::ParenExprClass:
581 // Ignore parentheses.
582 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
Ted Kremenek06de2762007-08-17 16:46:58 +0000583
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000584 case Stmt::UnaryOperatorClass: {
585 // The only unary operator that make sense to handle here
586 // is AddrOf. All others don't make sense as pointers.
587 UnaryOperator *U = cast<UnaryOperator>(E);
Ted Kremenek06de2762007-08-17 16:46:58 +0000588
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000589 if (U->getOpcode() == UnaryOperator::AddrOf)
590 return EvalVal(U->getSubExpr());
591 else
Ted Kremenek06de2762007-08-17 16:46:58 +0000592 return NULL;
593 }
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000594
595 case Stmt::BinaryOperatorClass: {
596 // Handle pointer arithmetic. All other binary operators are not valid
597 // in this context.
598 BinaryOperator *B = cast<BinaryOperator>(E);
599 BinaryOperator::Opcode op = B->getOpcode();
600
601 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
602 return NULL;
603
604 Expr *Base = B->getLHS();
605
606 // Determine which argument is the real pointer base. It could be
607 // the RHS argument instead of the LHS.
608 if (!Base->getType()->isPointerType()) Base = B->getRHS();
609
610 assert (Base->getType()->isPointerType());
611 return EvalAddr(Base);
612 }
613
614 // For conditional operators we need to see if either the LHS or RHS are
615 // valid DeclRefExpr*s. If one of them is valid, we return it.
616 case Stmt::ConditionalOperatorClass: {
617 ConditionalOperator *C = cast<ConditionalOperator>(E);
618
619 // Handle the GNU extension for missing LHS.
620 if (Expr *lhsExpr = C->getLHS())
621 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
622 return LHS;
623
624 return EvalAddr(C->getRHS());
625 }
626
627 // For implicit casts, we need to handle conversions from arrays to
628 // pointer values, and implicit pointer-to-pointer conversions.
629 case Stmt::ImplicitCastExprClass: {
630 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
631 Expr* SubExpr = IE->getSubExpr();
632
633 if (SubExpr->getType()->isPointerType() ||
Ted Kremeneka526c5c2008-01-07 19:49:32 +0000634 SubExpr->getType()->isObjCQualifiedIdType())
Chris Lattnerfae3f1f2007-12-28 05:31:15 +0000635 return EvalAddr(SubExpr);
636 else
637 return EvalVal(SubExpr);
638 }
639
640 // For casts, we handle pointer-to-pointer conversions (which
641 // is essentially a no-op from our mini-interpreter's standpoint).
642 // For other casts we abort.
643 case Stmt::CastExprClass: {
644 CastExpr *C = cast<CastExpr>(E);
645 Expr *SubExpr = C->getSubExpr();
646
647 if (SubExpr->getType()->isPointerType())
648 return EvalAddr(SubExpr);
649 else
650 return NULL;
651 }
652
653 // C++ casts. For dynamic casts, static casts, and const casts, we
654 // are always converting from a pointer-to-pointer, so we just blow
655 // through the cast. In the case the dynamic cast doesn't fail
656 // (and return NULL), we take the conservative route and report cases
657 // where we return the address of a stack variable. For Reinterpre
658 case Stmt::CXXCastExprClass: {
659 CXXCastExpr *C = cast<CXXCastExpr>(E);
660
661 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
662 Expr *S = C->getSubExpr();
663 if (S->getType()->isPointerType())
664 return EvalAddr(S);
665 else
666 return NULL;
667 }
668 else
669 return EvalAddr(C->getSubExpr());
670 }
671
672 // Everything else: we simply don't reason about them.
673 default:
674 return NULL;
675 }
Ted Kremenek06de2762007-08-17 16:46:58 +0000676}
677
678
679/// EvalVal - This function is complements EvalAddr in the mutual recursion.
680/// See the comments for EvalAddr for more details.
681static DeclRefExpr* EvalVal(Expr *E) {
682
Ted Kremeneke8c600f2007-08-28 17:02:55 +0000683 // We should only be called for evaluating non-pointer expressions, or
684 // expressions with a pointer type that are not used as references but instead
685 // are l-values (e.g., DeclRefExpr with a pointer type).
686
Ted Kremenek06de2762007-08-17 16:46:58 +0000687 // Our "symbolic interpreter" is just a dispatch off the currently
688 // viewed AST node. We then recursively traverse the AST by calling
689 // EvalAddr and EvalVal appropriately.
690 switch (E->getStmtClass()) {
Ted Kremenek06de2762007-08-17 16:46:58 +0000691 case Stmt::DeclRefExprClass: {
692 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
693 // at code that refers to a variable's name. We check if it has local
694 // storage within the function, and if so, return the expression.
695 DeclRefExpr *DR = cast<DeclRefExpr>(E);
696
697 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
698 if(V->hasLocalStorage()) return DR;
699
700 return NULL;
701 }
702
703 case Stmt::ParenExprClass:
704 // Ignore parentheses.
705 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
706
707 case Stmt::UnaryOperatorClass: {
708 // The only unary operator that make sense to handle here
709 // is Deref. All others don't resolve to a "name." This includes
710 // handling all sorts of rvalues passed to a unary operator.
711 UnaryOperator *U = cast<UnaryOperator>(E);
712
713 if (U->getOpcode() == UnaryOperator::Deref)
714 return EvalAddr(U->getSubExpr());
715
716 return NULL;
717 }
718
719 case Stmt::ArraySubscriptExprClass: {
720 // Array subscripts are potential references to data on the stack. We
721 // retrieve the DeclRefExpr* for the array variable if it indeed
722 // has local storage.
Ted Kremenek23245122007-08-20 16:18:38 +0000723 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek06de2762007-08-17 16:46:58 +0000724 }
725
726 case Stmt::ConditionalOperatorClass: {
727 // For conditional operators we need to see if either the LHS or RHS are
728 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
729 ConditionalOperator *C = cast<ConditionalOperator>(E);
730
Anders Carlsson39073232007-11-30 19:04:31 +0000731 // Handle the GNU extension for missing LHS.
732 if (Expr *lhsExpr = C->getLHS())
733 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
734 return LHS;
735
736 return EvalVal(C->getRHS());
Ted Kremenek06de2762007-08-17 16:46:58 +0000737 }
738
739 // Accesses to members are potential references to data on the stack.
740 case Stmt::MemberExprClass: {
741 MemberExpr *M = cast<MemberExpr>(E);
742
743 // Check for indirect access. We only want direct field accesses.
744 if (!M->isArrow())
745 return EvalVal(M->getBase());
746 else
747 return NULL;
748 }
749
750 // Everything else: we simply don't reason about them.
751 default:
752 return NULL;
753 }
754}
Ted Kremenek588e5eb2007-11-25 00:58:00 +0000755
756//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
757
758/// Check for comparisons of floating point operands using != and ==.
759/// Issue a warning if these are no self-comparisons, as they are not likely
760/// to do what the programmer intended.
761void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
762 bool EmitWarning = true;
763
Ted Kremenek4e99a5f2008-01-17 16:57:34 +0000764 Expr* LeftExprSansParen = lex->IgnoreParens();
Ted Kremenek32e97b62008-01-17 17:55:13 +0000765 Expr* RightExprSansParen = rex->IgnoreParens();
Ted Kremenek588e5eb2007-11-25 00:58:00 +0000766
767 // Special case: check for x == x (which is OK).
768 // Do not emit warnings for such cases.
769 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
770 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
771 if (DRL->getDecl() == DRR->getDecl())
772 EmitWarning = false;
773
Ted Kremenek1b500bb2007-11-29 00:59:04 +0000774
775 // Special case: check for comparisons against literals that can be exactly
776 // represented by APFloat. In such cases, do not emit a warning. This
777 // is a heuristic: often comparison against such literals are used to
778 // detect if a value in a variable has not changed. This clearly can
779 // lead to false negatives.
780 if (EmitWarning) {
781 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
782 if (FLL->isExact())
783 EmitWarning = false;
784 }
785 else
786 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
787 if (FLR->isExact())
788 EmitWarning = false;
789 }
790 }
791
Ted Kremenek588e5eb2007-11-25 00:58:00 +0000792 // Check for comparisons with builtin types.
793 if (EmitWarning)
794 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
795 if (isCallBuiltin(CL))
796 EmitWarning = false;
797
798 if (EmitWarning)
799 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
800 if (isCallBuiltin(CR))
801 EmitWarning = false;
802
803 // Emit the diagnostic.
804 if (EmitWarning)
805 Diag(loc, diag::warn_floatingpoint_eq,
806 lex->getSourceRange(),rex->getSourceRange());
807}