blob: ce0036e399cd99229537f6c50b3bbf39e03cc29a [file] [log] [blame]
Chris Lattner2e64c072007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner2e64c072007-08-10 20:18:51 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/Decl.h"
18#include "clang/AST/Expr.h"
Ted Kremenek1c1700f2007-08-20 16:18:38 +000019#include "clang/AST/ExprCXX.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000020#include "clang/Lex/Preprocessor.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/Diagnostic.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/TargetInfo.h"
Eli Friedman798e4d52008-05-16 17:51:27 +000026#include "llvm/ADT/OwningPtr.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000027#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/StringExtras.h"
Ted Kremenek30c66752007-11-25 00:58:00 +000029#include "SemaUtil.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000030using namespace clang;
31
32/// CheckFunctionCall - Check a direct function call for various correctness
33/// and safety properties not strictly enforced by the C type system.
Eli Friedmand0e9d092008-05-14 19:38:39 +000034Action::ExprResult
Eli Friedman798e4d52008-05-16 17:51:27 +000035Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) {
36 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw);
Chris Lattner2e64c072007-08-10 20:18:51 +000037 // Get the IdentifierInfo* for the called function.
38 IdentifierInfo *FnInfo = FDecl->getIdentifier();
39
Chris Lattnerf22a8502007-12-19 23:59:04 +000040 switch (FnInfo->getBuiltinID()) {
41 case Builtin::BI__builtin___CFStringMakeConstantString:
Chris Lattner83bd5eb2007-12-28 05:29:59 +000042 assert(TheCall->getNumArgs() == 1 &&
Chris Lattner7c8d1af2007-12-20 00:26:33 +000043 "Wrong # arguments to builtin CFStringMakeConstantString");
Eli Friedman798e4d52008-05-16 17:51:27 +000044 if (CheckBuiltinCFStringArgument(TheCall->getArg(0)))
Eli Friedmand0e9d092008-05-14 19:38:39 +000045 return true;
Eli Friedman798e4d52008-05-16 17:51:27 +000046 return TheCall.take();
Chris Lattnerf22a8502007-12-19 23:59:04 +000047 case Builtin::BI__builtin_va_start:
Eli Friedman798e4d52008-05-16 17:51:27 +000048 if (SemaBuiltinVAStart(TheCall.get())) {
Eli Friedmand0e9d092008-05-14 19:38:39 +000049 return true;
50 }
Eli Friedman798e4d52008-05-16 17:51:27 +000051 return TheCall.take();
Chris Lattner7c8d1af2007-12-20 00:26:33 +000052 case Builtin::BI__builtin_isgreater:
53 case Builtin::BI__builtin_isgreaterequal:
54 case Builtin::BI__builtin_isless:
55 case Builtin::BI__builtin_islessequal:
56 case Builtin::BI__builtin_islessgreater:
57 case Builtin::BI__builtin_isunordered:
Eli Friedman798e4d52008-05-16 17:51:27 +000058 if (SemaBuiltinUnorderedCompare(TheCall.get()))
Eli Friedmand0e9d092008-05-14 19:38:39 +000059 return true;
Eli Friedman798e4d52008-05-16 17:51:27 +000060 return TheCall.take();
Eli Friedmand0e9d092008-05-14 19:38:39 +000061 case Builtin::BI__builtin_shufflevector:
Eli Friedman798e4d52008-05-16 17:51:27 +000062 return SemaBuiltinShuffleVector(TheCall.get());
Anders Carlssone7e7aa22007-08-17 05:31:46 +000063 }
64
Chris Lattner2e64c072007-08-10 20:18:51 +000065 // Search the KnownFunctionIDs for the identifier.
66 unsigned i = 0, e = id_num_known_functions;
Ted Kremenek081ed872007-08-14 17:39:48 +000067 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
Eli Friedman798e4d52008-05-16 17:51:27 +000068 if (i == e) return TheCall.take();
Chris Lattner2e64c072007-08-10 20:18:51 +000069
70 // Printf checking.
71 if (i <= id_vprintf) {
Ted Kremenek081ed872007-08-14 17:39:48 +000072 // Retrieve the index of the format string parameter and determine
73 // if the function is passed a va_arg argument.
Chris Lattner2e64c072007-08-10 20:18:51 +000074 unsigned format_idx = 0;
Ted Kremenek081ed872007-08-14 17:39:48 +000075 bool HasVAListArg = false;
76
Chris Lattner2e64c072007-08-10 20:18:51 +000077 switch (i) {
Chris Lattnerf22a8502007-12-19 23:59:04 +000078 default: assert(false && "No format string argument index.");
79 case id_printf: format_idx = 0; break;
80 case id_fprintf: format_idx = 1; break;
81 case id_sprintf: format_idx = 1; break;
82 case id_snprintf: format_idx = 2; break;
83 case id_asprintf: format_idx = 1; break;
84 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
85 case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
86 case id_vfprintf: format_idx = 1; HasVAListArg = true; break;
87 case id_vsprintf: format_idx = 1; HasVAListArg = true; break;
88 case id_vprintf: format_idx = 0; HasVAListArg = true; break;
Ted Kremenek081ed872007-08-14 17:39:48 +000089 }
90
Eli Friedman798e4d52008-05-16 17:51:27 +000091 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx);
Chris Lattner2e64c072007-08-10 20:18:51 +000092 }
Anders Carlssone7e7aa22007-08-17 05:31:46 +000093
Eli Friedman798e4d52008-05-16 17:51:27 +000094 return TheCall.take();
Anders Carlssone7e7aa22007-08-17 05:31:46 +000095}
96
97/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
98/// CFString constructor is correct
Chris Lattnerda050402007-08-25 05:30:33 +000099bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000100 Arg = Arg->IgnoreParenCasts();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000101
102 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
103
104 if (!Literal || Literal->isWide()) {
105 Diag(Arg->getLocStart(),
106 diag::err_cfstring_literal_not_string_constant,
107 Arg->getSourceRange());
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000108 return true;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000109 }
110
111 const char *Data = Literal->getStrData();
112 unsigned Length = Literal->getByteLength();
113
114 for (unsigned i = 0; i < Length; ++i) {
115 if (!isascii(Data[i])) {
116 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
117 diag::warn_cfstring_literal_contains_non_ascii_character,
118 Arg->getSourceRange());
119 break;
120 }
121
122 if (!Data[i]) {
123 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
124 diag::warn_cfstring_literal_contains_nul_character,
125 Arg->getSourceRange());
126 break;
127 }
128 }
129
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000130 return false;
Chris Lattner2e64c072007-08-10 20:18:51 +0000131}
132
Chris Lattner3b933692007-12-20 00:05:45 +0000133/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
134/// Emit an error and return true on failure, return false on success.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000135bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
136 Expr *Fn = TheCall->getCallee();
137 if (TheCall->getNumArgs() > 2) {
138 Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattnerf22a8502007-12-19 23:59:04 +0000139 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000140 SourceRange(TheCall->getArg(2)->getLocStart(),
141 (*(TheCall->arg_end()-1))->getLocEnd()));
Chris Lattnerf22a8502007-12-19 23:59:04 +0000142 return true;
143 }
144
Chris Lattner3b933692007-12-20 00:05:45 +0000145 // Determine whether the current function is variadic or not.
146 bool isVariadic;
Chris Lattnerf22a8502007-12-19 23:59:04 +0000147 if (CurFunctionDecl)
Chris Lattner3b933692007-12-20 00:05:45 +0000148 isVariadic =
149 cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic();
Chris Lattnerf22a8502007-12-19 23:59:04 +0000150 else
Chris Lattner3b933692007-12-20 00:05:45 +0000151 isVariadic = CurMethodDecl->isVariadic();
Chris Lattnerf22a8502007-12-19 23:59:04 +0000152
Chris Lattner3b933692007-12-20 00:05:45 +0000153 if (!isVariadic) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000154 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
155 return true;
156 }
157
158 // Verify that the second argument to the builtin is the last argument of the
159 // current function or method.
160 bool SecondArgIsLastNamedArgument = false;
Anders Carlsson924556e2008-02-13 01:22:59 +0000161 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
Anders Carlssonc27156b2008-02-11 04:20:54 +0000162
163 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
164 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000165 // FIXME: This isn't correct for methods (results in bogus warning).
166 // Get the last formal in the current function.
Anders Carlssonc27156b2008-02-11 04:20:54 +0000167 const ParmVarDecl *LastArg;
Chris Lattnerf22a8502007-12-19 23:59:04 +0000168 if (CurFunctionDecl)
169 LastArg = *(CurFunctionDecl->param_end()-1);
170 else
171 LastArg = *(CurMethodDecl->param_end()-1);
172 SecondArgIsLastNamedArgument = PV == LastArg;
173 }
174 }
175
176 if (!SecondArgIsLastNamedArgument)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000177 Diag(TheCall->getArg(1)->getLocStart(),
Chris Lattnerf22a8502007-12-19 23:59:04 +0000178 diag::warn_second_parameter_of_va_start_not_last_named_argument);
179 return false;
180}
181
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000182/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
183/// friends. This is declared to take (...), so we have to check everything.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000184bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
185 if (TheCall->getNumArgs() < 2)
186 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args);
187 if (TheCall->getNumArgs() > 2)
188 return Diag(TheCall->getArg(2)->getLocStart(),
189 diag::err_typecheck_call_too_many_args,
190 SourceRange(TheCall->getArg(2)->getLocStart(),
191 (*(TheCall->arg_end()-1))->getLocEnd()));
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000192
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000193 Expr *OrigArg0 = TheCall->getArg(0);
194 Expr *OrigArg1 = TheCall->getArg(1);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000195
196 // Do standard promotions between the two arguments, returning their common
197 // type.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000198 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000199
200 // If the common type isn't a real floating type, then the arguments were
201 // invalid for this operation.
202 if (!Res->isRealFloatingType())
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000203 return Diag(OrigArg0->getLocStart(),
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000204 diag::err_typecheck_call_invalid_ordered_compare,
205 OrigArg0->getType().getAsString(),
206 OrigArg1->getType().getAsString(),
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000207 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()));
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000208
209 return false;
210}
211
Eli Friedmand0e9d092008-05-14 19:38:39 +0000212/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
213// This is declared to take (...), so we have to check everything.
214Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
215 if (TheCall->getNumArgs() < 3)
216 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
217 TheCall->getSourceRange());
218
219 QualType FAType = TheCall->getArg(0)->getType();
220 QualType SAType = TheCall->getArg(1)->getType();
221
222 if (!FAType->isVectorType() || !SAType->isVectorType()) {
223 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector,
224 SourceRange(TheCall->getArg(0)->getLocStart(),
225 TheCall->getArg(1)->getLocEnd()));
Eli Friedmand0e9d092008-05-14 19:38:39 +0000226 return true;
227 }
228
Eli Friedmand38439f2008-05-16 17:54:49 +0000229 if (FAType.getCanonicalType().getUnqualifiedType() !=
230 SAType.getCanonicalType().getUnqualifiedType()) {
Eli Friedmand0e9d092008-05-14 19:38:39 +0000231 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector,
232 SourceRange(TheCall->getArg(0)->getLocStart(),
233 TheCall->getArg(1)->getLocEnd()));
Eli Friedmand0e9d092008-05-14 19:38:39 +0000234 return true;
235 }
236
237 unsigned numElements = FAType->getAsVectorType()->getNumElements();
238 if (TheCall->getNumArgs() != numElements+2) {
239 if (TheCall->getNumArgs() < numElements+2)
240 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
241 TheCall->getSourceRange());
242 else
243 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args,
244 TheCall->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000245 return true;
246 }
247
248 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
249 llvm::APSInt Result(32);
250 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) {
251 Diag(TheCall->getLocStart(),
252 diag::err_shufflevector_nonconstant_argument,
253 TheCall->getArg(i)->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000254 return true;
255 }
256 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) {
257 Diag(TheCall->getLocStart(),
258 diag::err_shufflevector_argument_too_large,
259 TheCall->getArg(i)->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000260 return true;
261 }
262 }
263
264 llvm::SmallVector<Expr*, 32> exprs;
265
266 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) {
267 exprs.push_back(TheCall->getArg(i));
268 TheCall->setArg(i, 0);
269 }
270
271 ShuffleVectorExpr* E = new ShuffleVectorExpr(
272 exprs.begin(), numElements+2, FAType,
273 TheCall->getCallee()->getLocStart(),
274 TheCall->getRParenLoc());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000275
276 return E;
277}
Chris Lattnerf22a8502007-12-19 23:59:04 +0000278
Chris Lattner2e64c072007-08-10 20:18:51 +0000279/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek081ed872007-08-14 17:39:48 +0000280/// correct use of format strings.
281///
282/// HasVAListArg - A predicate indicating whether the printf-like
283/// function is passed an explicit va_arg argument (e.g., vprintf)
284///
285/// format_idx - The index into Args for the format string.
286///
287/// Improper format strings to functions in the printf family can be
288/// the source of bizarre bugs and very serious security holes. A
289/// good source of information is available in the following paper
290/// (which includes additional references):
Chris Lattner2e64c072007-08-10 20:18:51 +0000291///
292/// FormatGuard: Automatic Protection From printf Format String
293/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek081ed872007-08-14 17:39:48 +0000294///
295/// Functionality implemented:
296///
297/// We can statically check the following properties for string
298/// literal format strings for non v.*printf functions (where the
299/// arguments are passed directly):
300//
301/// (1) Are the number of format conversions equal to the number of
302/// data arguments?
303///
304/// (2) Does each format conversion correctly match the type of the
305/// corresponding data argument? (TODO)
306///
307/// Moreover, for all printf functions we can:
308///
309/// (3) Check for a missing format string (when not caught by type checking).
310///
311/// (4) Check for no-operation flags; e.g. using "#" with format
312/// conversion 'c' (TODO)
313///
314/// (5) Check the use of '%n', a major source of security holes.
315///
316/// (6) Check for malformed format conversions that don't specify anything.
317///
318/// (7) Check for empty format strings. e.g: printf("");
319///
320/// (8) Check that the format string is a wide literal.
321///
Ted Kremenekc2804c22008-03-03 16:50:00 +0000322/// (9) Also check the arguments of functions with the __format__ attribute.
323/// (TODO).
324///
Ted Kremenek081ed872007-08-14 17:39:48 +0000325/// All of these checks can be done by parsing the format string.
326///
327/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner2e64c072007-08-10 20:18:51 +0000328void
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000329Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
330 unsigned format_idx) {
331 Expr *Fn = TheCall->getCallee();
332
Ted Kremenek081ed872007-08-14 17:39:48 +0000333 // CHECK: printf-like function is called with no format string.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000334 if (format_idx >= TheCall->getNumArgs()) {
335 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string,
Ted Kremenek081ed872007-08-14 17:39:48 +0000336 Fn->getSourceRange());
337 return;
338 }
339
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000340 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
Chris Lattnere65acc12007-08-25 05:36:18 +0000341
Chris Lattner2e64c072007-08-10 20:18:51 +0000342 // CHECK: format string is not a string literal.
343 //
Ted Kremenek081ed872007-08-14 17:39:48 +0000344 // Dynamically generated format strings are difficult to
345 // automatically vet at compile time. Requiring that format strings
346 // are string literals: (1) permits the checking of format strings by
347 // the compiler and thereby (2) can practically remove the source of
348 // many format string exploits.
Chris Lattnere65acc12007-08-25 05:36:18 +0000349 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
Ted Kremenek081ed872007-08-14 17:39:48 +0000350 if (FExpr == NULL) {
Ted Kremenek19398b62007-12-17 19:03:13 +0000351 // For vprintf* functions (i.e., HasVAListArg==true), we add a
352 // special check to see if the format string is a function parameter
353 // of the function calling the printf function. If the function
354 // has an attribute indicating it is a printf-like function, then we
355 // should suppress warnings concerning non-literals being used in a call
356 // to a vprintf function. For example:
357 //
358 // void
359 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
360 // va_list ap;
361 // va_start(ap, fmt);
362 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
363 // ...
364 //
365 //
366 // FIXME: We don't have full attribute support yet, so just check to see
367 // if the argument is a DeclRefExpr that references a parameter. We'll
368 // add proper support for checking the attribute later.
369 if (HasVAListArg)
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000370 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
371 if (isa<ParmVarDecl>(DR->getDecl()))
Ted Kremenek19398b62007-12-17 19:03:13 +0000372 return;
373
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000374 Diag(TheCall->getArg(format_idx)->getLocStart(),
375 diag::warn_printf_not_string_constant, Fn->getSourceRange());
Ted Kremenek081ed872007-08-14 17:39:48 +0000376 return;
377 }
378
379 // CHECK: is the format string a wide literal?
380 if (FExpr->isWide()) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000381 Diag(FExpr->getLocStart(),
382 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange());
Ted Kremenek081ed872007-08-14 17:39:48 +0000383 return;
384 }
385
386 // Str - The format string. NOTE: this is NOT null-terminated!
387 const char * const Str = FExpr->getStrData();
388
389 // CHECK: empty format string?
390 const unsigned StrLen = FExpr->getByteLength();
391
392 if (StrLen == 0) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000393 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string,
394 Fn->getSourceRange());
Ted Kremenek081ed872007-08-14 17:39:48 +0000395 return;
396 }
397
398 // We process the format string using a binary state machine. The
399 // current state is stored in CurrentState.
400 enum {
401 state_OrdChr,
402 state_Conversion
403 } CurrentState = state_OrdChr;
404
405 // numConversions - The number of conversions seen so far. This is
406 // incremented as we traverse the format string.
407 unsigned numConversions = 0;
408
409 // numDataArgs - The number of data arguments after the format
410 // string. This can only be determined for non vprintf-like
411 // functions. For those functions, this value is 1 (the sole
412 // va_arg argument).
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000413 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
Ted Kremenek081ed872007-08-14 17:39:48 +0000414
415 // Inspect the format string.
416 unsigned StrIdx = 0;
417
418 // LastConversionIdx - Index within the format string where we last saw
419 // a '%' character that starts a new format conversion.
420 unsigned LastConversionIdx = 0;
421
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000422 for (; StrIdx < StrLen; ++StrIdx) {
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000423
Ted Kremenek081ed872007-08-14 17:39:48 +0000424 // Is the number of detected conversion conversions greater than
425 // the number of matching data arguments? If so, stop.
426 if (!HasVAListArg && numConversions > numDataArgs) break;
427
428 // Handle "\0"
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000429 if (Str[StrIdx] == '\0') {
Ted Kremenek081ed872007-08-14 17:39:48 +0000430 // The string returned by getStrData() is not null-terminated,
431 // so the presence of a null character is likely an error.
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000432 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
433 diag::warn_printf_format_string_contains_null_char,
Ted Kremenek081ed872007-08-14 17:39:48 +0000434 Fn->getSourceRange());
Ted Kremenek081ed872007-08-14 17:39:48 +0000435 return;
436 }
437
438 // Ordinary characters (not processing a format conversion).
439 if (CurrentState == state_OrdChr) {
440 if (Str[StrIdx] == '%') {
441 CurrentState = state_Conversion;
442 LastConversionIdx = StrIdx;
443 }
444 continue;
445 }
446
447 // Seen '%'. Now processing a format conversion.
448 switch (Str[StrIdx]) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000449 // Handle dynamic precision or width specifier.
450 case '*': {
451 ++numConversions;
452
453 if (!HasVAListArg && numConversions > numDataArgs) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000454 SourceLocation Loc = FExpr->getLocStart();
455 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
Ted Kremenek035d8792007-10-12 20:51:52 +0000456
Ted Kremenek035d8792007-10-12 20:51:52 +0000457 if (Str[StrIdx-1] == '.')
Chris Lattner68d88f02007-12-28 05:31:15 +0000458 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
459 Fn->getSourceRange());
Ted Kremenek035d8792007-10-12 20:51:52 +0000460 else
Chris Lattner68d88f02007-12-28 05:31:15 +0000461 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
462 Fn->getSourceRange());
Ted Kremenek035d8792007-10-12 20:51:52 +0000463
Chris Lattner68d88f02007-12-28 05:31:15 +0000464 // Don't do any more checking. We'll just emit spurious errors.
465 return;
Ted Kremenek035d8792007-10-12 20:51:52 +0000466 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000467
468 // Perform type checking on width/precision specifier.
469 Expr *E = TheCall->getArg(format_idx+numConversions);
470 if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
471 if (BT->getKind() == BuiltinType::Int)
472 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000473
Chris Lattner68d88f02007-12-28 05:31:15 +0000474 SourceLocation Loc =
475 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
476
477 if (Str[StrIdx-1] == '.')
478 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
479 E->getType().getAsString(), E->getSourceRange());
480 else
481 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
482 E->getType().getAsString(), E->getSourceRange());
483
484 break;
485 }
486
487 // Characters which can terminate a format conversion
488 // (e.g. "%d"). Characters that specify length modifiers or
489 // other flags are handled by the default case below.
490 //
491 // FIXME: additional checks will go into the following cases.
492 case 'i':
493 case 'd':
494 case 'o':
495 case 'u':
496 case 'x':
497 case 'X':
498 case 'D':
499 case 'O':
500 case 'U':
501 case 'e':
502 case 'E':
503 case 'f':
504 case 'F':
505 case 'g':
506 case 'G':
507 case 'a':
508 case 'A':
509 case 'c':
510 case 'C':
511 case 'S':
512 case 's':
513 case 'p':
514 ++numConversions;
515 CurrentState = state_OrdChr;
516 break;
517
518 // CHECK: Are we using "%n"? Issue a warning.
519 case 'n': {
520 ++numConversions;
521 CurrentState = state_OrdChr;
522 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
523 LastConversionIdx+1);
524
525 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
526 break;
527 }
528
529 // Handle "%%"
530 case '%':
531 // Sanity check: Was the first "%" character the previous one?
532 // If not, we will assume that we have a malformed format
533 // conversion, and that the current "%" character is the start
534 // of a new conversion.
535 if (StrIdx - LastConversionIdx == 1)
536 CurrentState = state_OrdChr;
537 else {
538 // Issue a warning: invalid format conversion.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000539 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
540 LastConversionIdx+1);
Chris Lattner68d88f02007-12-28 05:31:15 +0000541
542 Diag(Loc, diag::warn_printf_invalid_conversion,
543 std::string(Str+LastConversionIdx, Str+StrIdx),
544 Fn->getSourceRange());
545
546 // This conversion is broken. Advance to the next format
547 // conversion.
548 LastConversionIdx = StrIdx;
549 ++numConversions;
Ted Kremenek081ed872007-08-14 17:39:48 +0000550 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000551 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000552
Chris Lattner68d88f02007-12-28 05:31:15 +0000553 default:
554 // This case catches all other characters: flags, widths, etc.
555 // We should eventually process those as well.
556 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000557 }
558 }
559
560 if (CurrentState == state_Conversion) {
561 // Issue a warning: invalid format conversion.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000562 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
563 LastConversionIdx+1);
Ted Kremenek081ed872007-08-14 17:39:48 +0000564
565 Diag(Loc, diag::warn_printf_invalid_conversion,
Chris Lattner6f65d202007-08-26 17:38:22 +0000566 std::string(Str+LastConversionIdx,
567 Str+std::min(LastConversionIdx+2, StrLen)),
Ted Kremenek081ed872007-08-14 17:39:48 +0000568 Fn->getSourceRange());
569 return;
570 }
571
572 if (!HasVAListArg) {
573 // CHECK: Does the number of format conversions exceed the number
574 // of data arguments?
575 if (numConversions > numDataArgs) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000576 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
577 LastConversionIdx);
Ted Kremenek081ed872007-08-14 17:39:48 +0000578
579 Diag(Loc, diag::warn_printf_insufficient_data_args,
580 Fn->getSourceRange());
581 }
582 // CHECK: Does the number of data arguments exceed the number of
583 // format conversions in the format string?
584 else if (numConversions < numDataArgs)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000585 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
Ted Kremenek081ed872007-08-14 17:39:48 +0000586 diag::warn_printf_too_many_data_args, Fn->getSourceRange());
587 }
588}
Ted Kremenek45925ab2007-08-17 16:46:58 +0000589
590//===--- CHECK: Return Address of Stack Variable --------------------------===//
591
592static DeclRefExpr* EvalVal(Expr *E);
593static DeclRefExpr* EvalAddr(Expr* E);
594
595/// CheckReturnStackAddr - Check if a return statement returns the address
596/// of a stack variable.
597void
598Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
599 SourceLocation ReturnLoc) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000600
Ted Kremenek45925ab2007-08-17 16:46:58 +0000601 // Perform checking for returned stack addresses.
602 if (lhsType->isPointerType()) {
603 if (DeclRefExpr *DR = EvalAddr(RetValExp))
604 Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
605 DR->getDecl()->getIdentifier()->getName(),
606 RetValExp->getSourceRange());
607 }
608 // Perform checking for stack values returned by reference.
609 else if (lhsType->isReferenceType()) {
Ted Kremenek1456f202007-08-27 16:39:17 +0000610 // Check for an implicit cast to a reference.
611 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
612 if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
613 Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
614 DR->getDecl()->getIdentifier()->getName(),
615 RetValExp->getSourceRange());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000616 }
617}
618
619/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
620/// check if the expression in a return statement evaluates to an address
621/// to a location on the stack. The recursion is used to traverse the
622/// AST of the return expression, with recursion backtracking when we
623/// encounter a subexpression that (1) clearly does not lead to the address
624/// of a stack variable or (2) is something we cannot determine leads to
625/// the address of a stack variable based on such local checking.
626///
Ted Kremenekda1300a2007-08-28 17:02:55 +0000627/// EvalAddr processes expressions that are pointers that are used as
628/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek45925ab2007-08-17 16:46:58 +0000629/// At the base case of the recursion is a check for a DeclRefExpr* in
630/// the refers to a stack variable.
631///
632/// This implementation handles:
633///
634/// * pointer-to-pointer casts
635/// * implicit conversions from array references to pointers
636/// * taking the address of fields
637/// * arbitrary interplay between "&" and "*" operators
638/// * pointer arithmetic from an address of a stack variable
639/// * taking the address of an array element where the array is on the stack
640static DeclRefExpr* EvalAddr(Expr *E) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000641 // We should only be called for evaluating pointer expressions.
Chris Lattner68d88f02007-12-28 05:31:15 +0000642 assert((E->getType()->isPointerType() ||
Ted Kremenek42730c52008-01-07 19:49:32 +0000643 E->getType()->isObjCQualifiedIdType()) &&
Chris Lattner68d88f02007-12-28 05:31:15 +0000644 "EvalAddr only works on pointers");
Ted Kremenek45925ab2007-08-17 16:46:58 +0000645
646 // Our "symbolic interpreter" is just a dispatch off the currently
647 // viewed AST node. We then recursively traverse the AST by calling
648 // EvalAddr and EvalVal appropriately.
649 switch (E->getStmtClass()) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000650 case Stmt::ParenExprClass:
651 // Ignore parentheses.
652 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000653
Chris Lattner68d88f02007-12-28 05:31:15 +0000654 case Stmt::UnaryOperatorClass: {
655 // The only unary operator that make sense to handle here
656 // is AddrOf. All others don't make sense as pointers.
657 UnaryOperator *U = cast<UnaryOperator>(E);
Ted Kremenek45925ab2007-08-17 16:46:58 +0000658
Chris Lattner68d88f02007-12-28 05:31:15 +0000659 if (U->getOpcode() == UnaryOperator::AddrOf)
660 return EvalVal(U->getSubExpr());
661 else
Ted Kremenek45925ab2007-08-17 16:46:58 +0000662 return NULL;
663 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000664
665 case Stmt::BinaryOperatorClass: {
666 // Handle pointer arithmetic. All other binary operators are not valid
667 // in this context.
668 BinaryOperator *B = cast<BinaryOperator>(E);
669 BinaryOperator::Opcode op = B->getOpcode();
670
671 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
672 return NULL;
673
674 Expr *Base = B->getLHS();
675
676 // Determine which argument is the real pointer base. It could be
677 // the RHS argument instead of the LHS.
678 if (!Base->getType()->isPointerType()) Base = B->getRHS();
679
680 assert (Base->getType()->isPointerType());
681 return EvalAddr(Base);
682 }
683
684 // For conditional operators we need to see if either the LHS or RHS are
685 // valid DeclRefExpr*s. If one of them is valid, we return it.
686 case Stmt::ConditionalOperatorClass: {
687 ConditionalOperator *C = cast<ConditionalOperator>(E);
688
689 // Handle the GNU extension for missing LHS.
690 if (Expr *lhsExpr = C->getLHS())
691 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
692 return LHS;
693
694 return EvalAddr(C->getRHS());
695 }
696
697 // For implicit casts, we need to handle conversions from arrays to
698 // pointer values, and implicit pointer-to-pointer conversions.
699 case Stmt::ImplicitCastExprClass: {
700 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
701 Expr* SubExpr = IE->getSubExpr();
702
703 if (SubExpr->getType()->isPointerType() ||
Ted Kremenek42730c52008-01-07 19:49:32 +0000704 SubExpr->getType()->isObjCQualifiedIdType())
Chris Lattner68d88f02007-12-28 05:31:15 +0000705 return EvalAddr(SubExpr);
706 else
707 return EvalVal(SubExpr);
708 }
709
710 // For casts, we handle pointer-to-pointer conversions (which
711 // is essentially a no-op from our mini-interpreter's standpoint).
712 // For other casts we abort.
713 case Stmt::CastExprClass: {
714 CastExpr *C = cast<CastExpr>(E);
715 Expr *SubExpr = C->getSubExpr();
716
717 if (SubExpr->getType()->isPointerType())
718 return EvalAddr(SubExpr);
719 else
720 return NULL;
721 }
722
723 // C++ casts. For dynamic casts, static casts, and const casts, we
724 // are always converting from a pointer-to-pointer, so we just blow
725 // through the cast. In the case the dynamic cast doesn't fail
726 // (and return NULL), we take the conservative route and report cases
727 // where we return the address of a stack variable. For Reinterpre
728 case Stmt::CXXCastExprClass: {
729 CXXCastExpr *C = cast<CXXCastExpr>(E);
730
731 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
732 Expr *S = C->getSubExpr();
733 if (S->getType()->isPointerType())
734 return EvalAddr(S);
735 else
736 return NULL;
737 }
738 else
739 return EvalAddr(C->getSubExpr());
740 }
741
742 // Everything else: we simply don't reason about them.
743 default:
744 return NULL;
745 }
Ted Kremenek45925ab2007-08-17 16:46:58 +0000746}
747
748
749/// EvalVal - This function is complements EvalAddr in the mutual recursion.
750/// See the comments for EvalAddr for more details.
751static DeclRefExpr* EvalVal(Expr *E) {
752
Ted Kremenekda1300a2007-08-28 17:02:55 +0000753 // We should only be called for evaluating non-pointer expressions, or
754 // expressions with a pointer type that are not used as references but instead
755 // are l-values (e.g., DeclRefExpr with a pointer type).
756
Ted Kremenek45925ab2007-08-17 16:46:58 +0000757 // Our "symbolic interpreter" is just a dispatch off the currently
758 // viewed AST node. We then recursively traverse the AST by calling
759 // EvalAddr and EvalVal appropriately.
760 switch (E->getStmtClass()) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000761 case Stmt::DeclRefExprClass: {
762 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
763 // at code that refers to a variable's name. We check if it has local
764 // storage within the function, and if so, return the expression.
765 DeclRefExpr *DR = cast<DeclRefExpr>(E);
766
767 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
768 if(V->hasLocalStorage()) return DR;
769
770 return NULL;
771 }
772
773 case Stmt::ParenExprClass:
774 // Ignore parentheses.
775 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
776
777 case Stmt::UnaryOperatorClass: {
778 // The only unary operator that make sense to handle here
779 // is Deref. All others don't resolve to a "name." This includes
780 // handling all sorts of rvalues passed to a unary operator.
781 UnaryOperator *U = cast<UnaryOperator>(E);
782
783 if (U->getOpcode() == UnaryOperator::Deref)
784 return EvalAddr(U->getSubExpr());
785
786 return NULL;
787 }
788
789 case Stmt::ArraySubscriptExprClass: {
790 // Array subscripts are potential references to data on the stack. We
791 // retrieve the DeclRefExpr* for the array variable if it indeed
792 // has local storage.
Ted Kremenek1c1700f2007-08-20 16:18:38 +0000793 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000794 }
795
796 case Stmt::ConditionalOperatorClass: {
797 // For conditional operators we need to see if either the LHS or RHS are
798 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
799 ConditionalOperator *C = cast<ConditionalOperator>(E);
800
Anders Carlsson37365fc2007-11-30 19:04:31 +0000801 // Handle the GNU extension for missing LHS.
802 if (Expr *lhsExpr = C->getLHS())
803 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
804 return LHS;
805
806 return EvalVal(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000807 }
808
809 // Accesses to members are potential references to data on the stack.
810 case Stmt::MemberExprClass: {
811 MemberExpr *M = cast<MemberExpr>(E);
812
813 // Check for indirect access. We only want direct field accesses.
814 if (!M->isArrow())
815 return EvalVal(M->getBase());
816 else
817 return NULL;
818 }
819
820 // Everything else: we simply don't reason about them.
821 default:
822 return NULL;
823 }
824}
Ted Kremenek30c66752007-11-25 00:58:00 +0000825
826//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
827
828/// Check for comparisons of floating point operands using != and ==.
829/// Issue a warning if these are no self-comparisons, as they are not likely
830/// to do what the programmer intended.
831void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
832 bool EmitWarning = true;
833
Ted Kremenek87e30c52008-01-17 16:57:34 +0000834 Expr* LeftExprSansParen = lex->IgnoreParens();
Ted Kremenek24c61682008-01-17 17:55:13 +0000835 Expr* RightExprSansParen = rex->IgnoreParens();
Ted Kremenek30c66752007-11-25 00:58:00 +0000836
837 // Special case: check for x == x (which is OK).
838 // Do not emit warnings for such cases.
839 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
840 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
841 if (DRL->getDecl() == DRR->getDecl())
842 EmitWarning = false;
843
Ted Kremenek33159832007-11-29 00:59:04 +0000844
845 // Special case: check for comparisons against literals that can be exactly
846 // represented by APFloat. In such cases, do not emit a warning. This
847 // is a heuristic: often comparison against such literals are used to
848 // detect if a value in a variable has not changed. This clearly can
849 // lead to false negatives.
850 if (EmitWarning) {
851 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
852 if (FLL->isExact())
853 EmitWarning = false;
854 }
855 else
856 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
857 if (FLR->isExact())
858 EmitWarning = false;
859 }
860 }
861
Ted Kremenek30c66752007-11-25 00:58:00 +0000862 // Check for comparisons with builtin types.
863 if (EmitWarning)
864 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
865 if (isCallBuiltin(CL))
866 EmitWarning = false;
867
868 if (EmitWarning)
869 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
870 if (isCallBuiltin(CR))
871 EmitWarning = false;
872
873 // Emit the diagnostic.
874 if (EmitWarning)
875 Diag(loc, diag::warn_floatingpoint_eq,
876 lex->getSourceRange(),rex->getSourceRange());
877}