blob: 818a892617e1ce48a278b9748897b9312160f07b [file] [log] [blame]
Chris Lattner2e64c072007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner2e64c072007-08-10 20:18:51 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
Daniel Dunbar64789f82008-08-11 05:35:13 +000017#include "clang/AST/DeclObjC.h"
Ted Kremenek1c1700f2007-08-20 16:18:38 +000018#include "clang/AST/ExprCXX.h"
Ted Kremenek225a14c2008-06-16 18:00:42 +000019#include "clang/AST/ExprObjC.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000020#include "clang/Lex/Preprocessor.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000021using namespace clang;
22
Chris Lattnerf17cb362009-02-18 17:49:48 +000023/// getLocationOfStringLiteralByte - Return a source location that points to the
24/// specified byte of the specified string literal.
25///
26/// Strings are amazingly complex. They can be formed from multiple tokens and
27/// can have escape sequences in them in addition to the usual trigraph and
28/// escaped newline business. This routine handles this complexity.
29///
30SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
31 unsigned ByteNo) const {
32 assert(!SL->isWide() && "This doesn't work for wide strings yet");
33
Chris Lattner30183b02009-02-18 18:34:12 +000034 llvm::SmallString<32> SpellingBuffer;
35
Chris Lattnerf17cb362009-02-18 17:49:48 +000036 // Loop over all of the tokens in this string until we find the one that
37 // contains the byte we're looking for.
38 unsigned TokNo = 0;
39 while (1) {
40 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
41 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
42
43 // Get the spelling of the string so that we can get the data that makes up
44 // the string literal, not the identifier for the macro it is potentially
45 // expanded through.
46 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
47
48 // Re-lex the token to get its length and original spelling.
49 std::pair<FileID, unsigned> LocInfo =
50 SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
51 std::pair<const char *,const char *> Buffer =
52 SourceMgr.getBufferData(LocInfo.first);
53 const char *StrData = Buffer.first+LocInfo.second;
54
55 // Create a langops struct and enable trigraphs. This is sufficient for
56 // relexing tokens.
57 LangOptions LangOpts;
58 LangOpts.Trigraphs = true;
59
60 // Create a lexer starting at the beginning of this token.
61 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
62 Buffer.second);
63 Token TheTok;
64 TheLexer.LexFromRawLexer(TheTok);
65
Chris Lattner4ec7a052009-02-18 18:40:20 +000066 // We generally care about the length of the token, which is known by the
67 // lexer as long as we don't need to clean it (trigraphs/newlines).
68 unsigned TokNumBytes;
69 if (!TheTok.needsCleaning()) {
70 TokNumBytes = TheTok.getLength();
71 } else {
72 // Get the spelling of the token to remove trigraphs and escaped newlines.
73 SpellingBuffer.resize(TheTok.getLength());
74 const char *SpellingPtr = &SpellingBuffer[0];
75 TokNumBytes = PP.getSpelling(TheTok, SpellingPtr);
76 }
Chris Lattner30183b02009-02-18 18:34:12 +000077
Chris Lattnerf17cb362009-02-18 17:49:48 +000078 // The length of the string is the token length minus the two quotes.
Chris Lattner4ec7a052009-02-18 18:40:20 +000079 TokNumBytes -= 2;
Chris Lattnerf17cb362009-02-18 17:49:48 +000080
81 // If we found the token we're looking for, return the location.
82 // FIXME: This should consider character escapes!
83 if (ByteNo < TokNumBytes ||
84 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
85 // If the original token came from a macro expansion, just return the
86 // start of the token. We don't want to magically jump to the spelling
87 // for a diagnostic. We do the above business in case some tokens come
88 // from a macro expansion but others don't.
89 if (!StrTokLoc.isFileID()) return StrTokLoc;
90
91 // We advance +1 to step over the '"'.
92 return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
93 }
94
95 // Move to the next string token.
96 ++TokNo;
97 ByteNo -= TokNumBytes;
98 }
99}
100
101
Chris Lattner2e64c072007-08-10 20:18:51 +0000102/// CheckFunctionCall - Check a direct function call for various correctness
103/// and safety properties not strictly enforced by the C type system.
Sebastian Redl8b769972009-01-19 00:08:26 +0000104Action::OwningExprResult
105Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
106 OwningExprResult TheCallResult(Owned(TheCall));
Chris Lattner2e64c072007-08-10 20:18:51 +0000107 // Get the IdentifierInfo* for the called function.
108 IdentifierInfo *FnInfo = FDecl->getIdentifier();
Douglas Gregorb0212bd2008-11-17 20:34:05 +0000109
110 // None of the checks below are needed for functions that don't have
111 // simple names (e.g., C++ conversion functions).
112 if (!FnInfo)
Sebastian Redl8b769972009-01-19 00:08:26 +0000113 return move(TheCallResult);
Douglas Gregorb0212bd2008-11-17 20:34:05 +0000114
Douglas Gregorb5af7382009-02-14 18:57:46 +0000115 switch (FDecl->getBuiltinID(Context)) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000116 case Builtin::BI__builtin___CFStringMakeConstantString:
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000117 assert(TheCall->getNumArgs() == 1 &&
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000118 "Wrong # arguments to builtin CFStringMakeConstantString");
Chris Lattner81f5be22009-02-18 06:01:06 +0000119 if (CheckObjCString(TheCall->getArg(0)))
Sebastian Redl8b769972009-01-19 00:08:26 +0000120 return ExprError();
121 return move(TheCallResult);
Ted Kremenek7a0654c2008-07-09 17:58:53 +0000122 case Builtin::BI__builtin_stdarg_start:
Chris Lattnerf22a8502007-12-19 23:59:04 +0000123 case Builtin::BI__builtin_va_start:
Sebastian Redl8b769972009-01-19 00:08:26 +0000124 if (SemaBuiltinVAStart(TheCall))
125 return ExprError();
126 return move(TheCallResult);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000127 case Builtin::BI__builtin_isgreater:
128 case Builtin::BI__builtin_isgreaterequal:
129 case Builtin::BI__builtin_isless:
130 case Builtin::BI__builtin_islessequal:
131 case Builtin::BI__builtin_islessgreater:
132 case Builtin::BI__builtin_isunordered:
Sebastian Redl8b769972009-01-19 00:08:26 +0000133 if (SemaBuiltinUnorderedCompare(TheCall))
134 return ExprError();
135 return move(TheCallResult);
Eli Friedman8c50c622008-05-20 08:23:37 +0000136 case Builtin::BI__builtin_return_address:
137 case Builtin::BI__builtin_frame_address:
Sebastian Redl8b769972009-01-19 00:08:26 +0000138 if (SemaBuiltinStackAddress(TheCall))
139 return ExprError();
140 return move(TheCallResult);
Eli Friedmand0e9d092008-05-14 19:38:39 +0000141 case Builtin::BI__builtin_shufflevector:
Sebastian Redl8b769972009-01-19 00:08:26 +0000142 return SemaBuiltinShuffleVector(TheCall);
143 // TheCall will be freed by the smart pointer here, but that's fine, since
144 // SemaBuiltinShuffleVector guts it, but then doesn't release it.
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000145 case Builtin::BI__builtin_prefetch:
Sebastian Redl8b769972009-01-19 00:08:26 +0000146 if (SemaBuiltinPrefetch(TheCall))
147 return ExprError();
148 return move(TheCallResult);
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000149 case Builtin::BI__builtin_object_size:
Sebastian Redl8b769972009-01-19 00:08:26 +0000150 if (SemaBuiltinObjectSize(TheCall))
151 return ExprError();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000152 }
Daniel Dunbar0ab03e62008-10-02 18:44:07 +0000153
154 // FIXME: This mechanism should be abstracted to be less fragile and
155 // more efficient. For example, just map function ids to custom
156 // handlers.
157
Chris Lattner2e64c072007-08-10 20:18:51 +0000158 // Printf checking.
Douglas Gregorb5af7382009-02-14 18:57:46 +0000159 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
160 if (Format->getType() == "printf") {
161 bool HasVAListArg = false;
162 if (const FunctionTypeProto *Proto
163 = FDecl->getType()->getAsFunctionTypeProto())
164 HasVAListArg = !Proto->isVariadic();
165 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
166 Format->getFirstArg() - 1);
167 }
Chris Lattner2e64c072007-08-10 20:18:51 +0000168 }
Sebastian Redl8b769972009-01-19 00:08:26 +0000169
170 return move(TheCallResult);
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000171}
172
Chris Lattner81f5be22009-02-18 06:01:06 +0000173/// CheckObjCString - Checks that the argument to the builtin
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000174/// CFString constructor is correct
Chris Lattner81f5be22009-02-18 06:01:06 +0000175bool Sema::CheckObjCString(Expr *Arg) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000176 Arg = Arg->IgnoreParenCasts();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000177 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
178
179 if (!Literal || Literal->isWide()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000180 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
181 << Arg->getSourceRange();
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000182 return true;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000183 }
184
185 const char *Data = Literal->getStrData();
186 unsigned Length = Literal->getByteLength();
187
188 for (unsigned i = 0; i < Length; ++i) {
189 if (!isascii(Data[i])) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000190 Diag(getLocationOfStringLiteralByte(Literal, i),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000191 diag::warn_cfstring_literal_contains_non_ascii_character)
192 << Arg->getSourceRange();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000193 break;
194 }
195
196 if (!Data[i]) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000197 Diag(getLocationOfStringLiteralByte(Literal, i),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000198 diag::warn_cfstring_literal_contains_nul_character)
199 << Arg->getSourceRange();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000200 break;
201 }
202 }
203
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000204 return false;
Chris Lattner2e64c072007-08-10 20:18:51 +0000205}
206
Chris Lattner3b933692007-12-20 00:05:45 +0000207/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
208/// Emit an error and return true on failure, return false on success.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000209bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
210 Expr *Fn = TheCall->getCallee();
211 if (TheCall->getNumArgs() > 2) {
Chris Lattner66beaba2008-11-21 18:44:24 +0000212 Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000213 diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000214 << 0 /*function call*/ << Fn->getSourceRange()
Chris Lattner8ba580c2008-11-19 05:08:23 +0000215 << SourceRange(TheCall->getArg(2)->getLocStart(),
216 (*(TheCall->arg_end()-1))->getLocEnd());
Chris Lattnerf22a8502007-12-19 23:59:04 +0000217 return true;
218 }
Eli Friedman6422de62008-12-15 22:05:35 +0000219
220 if (TheCall->getNumArgs() < 2) {
221 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
222 << 0 /*function call*/;
223 }
224
Chris Lattner3b933692007-12-20 00:05:45 +0000225 // Determine whether the current function is variadic or not.
226 bool isVariadic;
Eli Friedman6422de62008-12-15 22:05:35 +0000227 if (getCurFunctionDecl()) {
228 if (FunctionTypeProto* FTP =
229 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType()))
230 isVariadic = FTP->isVariadic();
231 else
232 isVariadic = false;
233 } else {
Argiris Kirtzidis95256e62008-06-28 06:07:14 +0000234 isVariadic = getCurMethodDecl()->isVariadic();
Eli Friedman6422de62008-12-15 22:05:35 +0000235 }
Chris Lattnerf22a8502007-12-19 23:59:04 +0000236
Chris Lattner3b933692007-12-20 00:05:45 +0000237 if (!isVariadic) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000238 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
239 return true;
240 }
241
242 // Verify that the second argument to the builtin is the last argument of the
243 // current function or method.
244 bool SecondArgIsLastNamedArgument = false;
Anders Carlsson924556e2008-02-13 01:22:59 +0000245 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
Anders Carlssonc27156b2008-02-11 04:20:54 +0000246
247 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
248 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000249 // FIXME: This isn't correct for methods (results in bogus warning).
250 // Get the last formal in the current function.
Anders Carlssonc27156b2008-02-11 04:20:54 +0000251 const ParmVarDecl *LastArg;
Chris Lattnere5cb5862008-12-04 23:50:19 +0000252 if (FunctionDecl *FD = getCurFunctionDecl())
253 LastArg = *(FD->param_end()-1);
Chris Lattnerf22a8502007-12-19 23:59:04 +0000254 else
Argiris Kirtzidis95256e62008-06-28 06:07:14 +0000255 LastArg = *(getCurMethodDecl()->param_end()-1);
Chris Lattnerf22a8502007-12-19 23:59:04 +0000256 SecondArgIsLastNamedArgument = PV == LastArg;
257 }
258 }
259
260 if (!SecondArgIsLastNamedArgument)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000261 Diag(TheCall->getArg(1)->getLocStart(),
Chris Lattnerf22a8502007-12-19 23:59:04 +0000262 diag::warn_second_parameter_of_va_start_not_last_named_argument);
263 return false;
Eli Friedman8c50c622008-05-20 08:23:37 +0000264}
Chris Lattnerf22a8502007-12-19 23:59:04 +0000265
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000266/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
267/// friends. This is declared to take (...), so we have to check everything.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000268bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
269 if (TheCall->getNumArgs() < 2)
Chris Lattner66beaba2008-11-21 18:44:24 +0000270 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
271 << 0 /*function call*/;
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000272 if (TheCall->getNumArgs() > 2)
273 return Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000274 diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000275 << 0 /*function call*/
Chris Lattner8ba580c2008-11-19 05:08:23 +0000276 << SourceRange(TheCall->getArg(2)->getLocStart(),
277 (*(TheCall->arg_end()-1))->getLocEnd());
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000278
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000279 Expr *OrigArg0 = TheCall->getArg(0);
280 Expr *OrigArg1 = TheCall->getArg(1);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000281
282 // Do standard promotions between the two arguments, returning their common
283 // type.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000284 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000285
286 // If the common type isn't a real floating type, then the arguments were
287 // invalid for this operation.
288 if (!Res->isRealFloatingType())
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000289 return Diag(OrigArg0->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000290 diag::err_typecheck_call_invalid_ordered_compare)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000291 << OrigArg0->getType() << OrigArg1->getType()
Chris Lattner8ba580c2008-11-19 05:08:23 +0000292 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000293
294 return false;
295}
296
Eli Friedman8c50c622008-05-20 08:23:37 +0000297bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
298 // The signature for these builtins is exact; the only thing we need
299 // to check is that the argument is a constant.
300 SourceLocation Loc;
Chris Lattner941c0102008-08-10 02:05:13 +0000301 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
Chris Lattner8ba580c2008-11-19 05:08:23 +0000302 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
Chris Lattner941c0102008-08-10 02:05:13 +0000303
Eli Friedman8c50c622008-05-20 08:23:37 +0000304 return false;
305}
306
Eli Friedmand0e9d092008-05-14 19:38:39 +0000307/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
308// This is declared to take (...), so we have to check everything.
Sebastian Redl8b769972009-01-19 00:08:26 +0000309Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
Eli Friedmand0e9d092008-05-14 19:38:39 +0000310 if (TheCall->getNumArgs() < 3)
Sebastian Redl8b769972009-01-19 00:08:26 +0000311 return ExprError(Diag(TheCall->getLocEnd(),
312 diag::err_typecheck_call_too_few_args)
313 << 0 /*function call*/ << TheCall->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000314
315 QualType FAType = TheCall->getArg(0)->getType();
316 QualType SAType = TheCall->getArg(1)->getType();
317
318 if (!FAType->isVectorType() || !SAType->isVectorType()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000319 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
320 << SourceRange(TheCall->getArg(0)->getLocStart(),
321 TheCall->getArg(1)->getLocEnd());
Sebastian Redl8b769972009-01-19 00:08:26 +0000322 return ExprError();
Eli Friedmand0e9d092008-05-14 19:38:39 +0000323 }
324
Chris Lattnerd5a56aa2008-07-26 22:17:49 +0000325 if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
326 Context.getCanonicalType(SAType).getUnqualifiedType()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000327 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
328 << SourceRange(TheCall->getArg(0)->getLocStart(),
329 TheCall->getArg(1)->getLocEnd());
Sebastian Redl8b769972009-01-19 00:08:26 +0000330 return ExprError();
Eli Friedmand0e9d092008-05-14 19:38:39 +0000331 }
332
333 unsigned numElements = FAType->getAsVectorType()->getNumElements();
334 if (TheCall->getNumArgs() != numElements+2) {
335 if (TheCall->getNumArgs() < numElements+2)
Sebastian Redl8b769972009-01-19 00:08:26 +0000336 return ExprError(Diag(TheCall->getLocEnd(),
337 diag::err_typecheck_call_too_few_args)
338 << 0 /*function call*/ << TheCall->getSourceRange());
339 return ExprError(Diag(TheCall->getLocEnd(),
340 diag::err_typecheck_call_too_many_args)
341 << 0 /*function call*/ << TheCall->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000342 }
343
344 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
345 llvm::APSInt Result(32);
Chris Lattner941c0102008-08-10 02:05:13 +0000346 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
Sebastian Redl8b769972009-01-19 00:08:26 +0000347 return ExprError(Diag(TheCall->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000348 diag::err_shufflevector_nonconstant_argument)
Sebastian Redl8b769972009-01-19 00:08:26 +0000349 << TheCall->getArg(i)->getSourceRange());
350
Chris Lattner941c0102008-08-10 02:05:13 +0000351 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
Sebastian Redl8b769972009-01-19 00:08:26 +0000352 return ExprError(Diag(TheCall->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000353 diag::err_shufflevector_argument_too_large)
Sebastian Redl8b769972009-01-19 00:08:26 +0000354 << TheCall->getArg(i)->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000355 }
356
357 llvm::SmallVector<Expr*, 32> exprs;
358
Chris Lattner941c0102008-08-10 02:05:13 +0000359 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
Eli Friedmand0e9d092008-05-14 19:38:39 +0000360 exprs.push_back(TheCall->getArg(i));
361 TheCall->setArg(i, 0);
362 }
363
Ted Kremenek0c97e042009-02-07 01:47:29 +0000364 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2,
365 FAType,
366 TheCall->getCallee()->getLocStart(),
367 TheCall->getRParenLoc()));
Eli Friedmand0e9d092008-05-14 19:38:39 +0000368}
Chris Lattnerf22a8502007-12-19 23:59:04 +0000369
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000370/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
371// This is declared to take (const void*, ...) and can take two
372// optional constant int args.
373bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000374 unsigned NumArgs = TheCall->getNumArgs();
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000375
Chris Lattner8ba580c2008-11-19 05:08:23 +0000376 if (NumArgs > 3)
377 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000378 << 0 /*function call*/ << TheCall->getSourceRange();
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000379
380 // Argument 0 is checked for us and the remaining arguments must be
381 // constant integers.
Chris Lattner8ba580c2008-11-19 05:08:23 +0000382 for (unsigned i = 1; i != NumArgs; ++i) {
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000383 Expr *Arg = TheCall->getArg(i);
384 QualType RWType = Arg->getType();
385
386 const BuiltinType *BT = RWType->getAsBuiltinType();
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000387 llvm::APSInt Result;
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000388 if (!BT || BT->getKind() != BuiltinType::Int ||
Chris Lattner8ba580c2008-11-19 05:08:23 +0000389 !Arg->isIntegerConstantExpr(Result, Context))
390 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
391 << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000392
393 // FIXME: gcc issues a warning and rewrites these to 0. These
394 // seems especially odd for the third argument since the default
395 // is 3.
Chris Lattner8ba580c2008-11-19 05:08:23 +0000396 if (i == 1) {
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000397 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
Chris Lattner8ba580c2008-11-19 05:08:23 +0000398 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
399 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000400 } else {
401 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
Chris Lattner8ba580c2008-11-19 05:08:23 +0000402 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
403 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000404 }
405 }
406
Chris Lattner8ba580c2008-11-19 05:08:23 +0000407 return false;
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000408}
409
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000410/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
411/// int type). This simply type checks that type is one of the defined
412/// constants (0-3).
413bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
414 Expr *Arg = TheCall->getArg(1);
415 QualType ArgType = Arg->getType();
416 const BuiltinType *BT = ArgType->getAsBuiltinType();
417 llvm::APSInt Result(32);
418 if (!BT || BT->getKind() != BuiltinType::Int ||
419 !Arg->isIntegerConstantExpr(Result, Context)) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000420 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
421 << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000422 }
423
424 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000425 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
426 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000427 }
428
429 return false;
430}
431
Ted Kremenek8c797c02009-01-12 23:09:09 +0000432// Handle i > 1 ? "x" : "y", recursivelly
433bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000434 unsigned format_idx, unsigned firstDataArg) {
Ted Kremenek8c797c02009-01-12 23:09:09 +0000435
436 switch (E->getStmtClass()) {
437 case Stmt::ConditionalOperatorClass: {
438 ConditionalOperator *C = cast<ConditionalOperator>(E);
439 return SemaCheckStringLiteral(C->getLHS(), TheCall,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000440 HasVAListArg, format_idx, firstDataArg)
Ted Kremenek8c797c02009-01-12 23:09:09 +0000441 && SemaCheckStringLiteral(C->getRHS(), TheCall,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000442 HasVAListArg, format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000443 }
444
445 case Stmt::ImplicitCastExprClass: {
446 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E);
447 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000448 format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000449 }
450
451 case Stmt::ParenExprClass: {
452 ParenExpr *Expr = dyn_cast<ParenExpr>(E);
453 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000454 format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000455 }
456
457 default: {
458 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E);
459 StringLiteral *StrE = NULL;
460
461 if (ObjCFExpr)
462 StrE = ObjCFExpr->getString();
463 else
464 StrE = dyn_cast<StringLiteral>(E);
465
466 if (StrE) {
Douglas Gregorb5af7382009-02-14 18:57:46 +0000467 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
468 firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000469 return true;
470 }
471
472 return false;
473 }
474 }
475}
476
477
Chris Lattner2e64c072007-08-10 20:18:51 +0000478/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek081ed872007-08-14 17:39:48 +0000479/// correct use of format strings.
480///
481/// HasVAListArg - A predicate indicating whether the printf-like
482/// function is passed an explicit va_arg argument (e.g., vprintf)
483///
484/// format_idx - The index into Args for the format string.
485///
486/// Improper format strings to functions in the printf family can be
487/// the source of bizarre bugs and very serious security holes. A
488/// good source of information is available in the following paper
489/// (which includes additional references):
Chris Lattner2e64c072007-08-10 20:18:51 +0000490///
491/// FormatGuard: Automatic Protection From printf Format String
492/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek081ed872007-08-14 17:39:48 +0000493///
494/// Functionality implemented:
495///
496/// We can statically check the following properties for string
497/// literal format strings for non v.*printf functions (where the
498/// arguments are passed directly):
499//
500/// (1) Are the number of format conversions equal to the number of
501/// data arguments?
502///
503/// (2) Does each format conversion correctly match the type of the
504/// corresponding data argument? (TODO)
505///
506/// Moreover, for all printf functions we can:
507///
508/// (3) Check for a missing format string (when not caught by type checking).
509///
510/// (4) Check for no-operation flags; e.g. using "#" with format
511/// conversion 'c' (TODO)
512///
513/// (5) Check the use of '%n', a major source of security holes.
514///
515/// (6) Check for malformed format conversions that don't specify anything.
516///
517/// (7) Check for empty format strings. e.g: printf("");
518///
519/// (8) Check that the format string is a wide literal.
520///
Ted Kremenekc2804c22008-03-03 16:50:00 +0000521/// (9) Also check the arguments of functions with the __format__ attribute.
522/// (TODO).
523///
Ted Kremenek081ed872007-08-14 17:39:48 +0000524/// All of these checks can be done by parsing the format string.
525///
526/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner2e64c072007-08-10 20:18:51 +0000527void
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000528Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000529 unsigned format_idx, unsigned firstDataArg) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000530 Expr *Fn = TheCall->getCallee();
531
Ted Kremenek081ed872007-08-14 17:39:48 +0000532 // CHECK: printf-like function is called with no format string.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000533 if (format_idx >= TheCall->getNumArgs()) {
Chris Lattner9d2cf082008-11-19 05:27:50 +0000534 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
535 << Fn->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000536 return;
537 }
538
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000539 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
Chris Lattnere65acc12007-08-25 05:36:18 +0000540
Chris Lattner2e64c072007-08-10 20:18:51 +0000541 // CHECK: format string is not a string literal.
542 //
Ted Kremenek081ed872007-08-14 17:39:48 +0000543 // Dynamically generated format strings are difficult to
544 // automatically vet at compile time. Requiring that format strings
545 // are string literals: (1) permits the checking of format strings by
546 // the compiler and thereby (2) can practically remove the source of
547 // many format string exploits.
Ted Kremenek225a14c2008-06-16 18:00:42 +0000548
549 // Format string can be either ObjC string (e.g. @"%d") or
550 // C string (e.g. "%d")
551 // ObjC string uses the same format specifiers as C string, so we can use
552 // the same format string checking logic for both ObjC and C strings.
Douglas Gregorb5af7382009-02-14 18:57:46 +0000553 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall,
554 HasVAListArg, format_idx,
555 firstDataArg);
Ted Kremenek225a14c2008-06-16 18:00:42 +0000556
Ted Kremenek8c797c02009-01-12 23:09:09 +0000557 if (!isFExpr) {
Ted Kremenek19398b62007-12-17 19:03:13 +0000558 // For vprintf* functions (i.e., HasVAListArg==true), we add a
559 // special check to see if the format string is a function parameter
560 // of the function calling the printf function. If the function
561 // has an attribute indicating it is a printf-like function, then we
562 // should suppress warnings concerning non-literals being used in a call
563 // to a vprintf function. For example:
564 //
565 // void
566 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
567 // va_list ap;
568 // va_start(ap, fmt);
569 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
570 // ...
571 //
572 //
573 // FIXME: We don't have full attribute support yet, so just check to see
574 // if the argument is a DeclRefExpr that references a parameter. We'll
575 // add proper support for checking the attribute later.
576 if (HasVAListArg)
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000577 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
578 if (isa<ParmVarDecl>(DR->getDecl()))
Ted Kremenek19398b62007-12-17 19:03:13 +0000579 return;
Ted Kremenek8c797c02009-01-12 23:09:09 +0000580
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000581 Diag(TheCall->getArg(format_idx)->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000582 diag::warn_printf_not_string_constant)
583 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000584 return;
585 }
Ted Kremenek8c797c02009-01-12 23:09:09 +0000586}
Ted Kremenek081ed872007-08-14 17:39:48 +0000587
Ted Kremenek8c797c02009-01-12 23:09:09 +0000588void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000589 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx,
590 unsigned firstDataArg) {
Ted Kremenek8c797c02009-01-12 23:09:09 +0000591
592 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
Ted Kremenek081ed872007-08-14 17:39:48 +0000593 // CHECK: is the format string a wide literal?
594 if (FExpr->isWide()) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000595 Diag(FExpr->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000596 diag::warn_printf_format_string_is_wide_literal)
597 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000598 return;
599 }
600
601 // Str - The format string. NOTE: this is NOT null-terminated!
602 const char * const Str = FExpr->getStrData();
603
604 // CHECK: empty format string?
605 const unsigned StrLen = FExpr->getByteLength();
606
607 if (StrLen == 0) {
Chris Lattner9d2cf082008-11-19 05:27:50 +0000608 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
609 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000610 return;
611 }
612
613 // We process the format string using a binary state machine. The
614 // current state is stored in CurrentState.
615 enum {
616 state_OrdChr,
617 state_Conversion
618 } CurrentState = state_OrdChr;
619
620 // numConversions - The number of conversions seen so far. This is
621 // incremented as we traverse the format string.
622 unsigned numConversions = 0;
623
624 // numDataArgs - The number of data arguments after the format
625 // string. This can only be determined for non vprintf-like
626 // functions. For those functions, this value is 1 (the sole
627 // va_arg argument).
Douglas Gregorb5af7382009-02-14 18:57:46 +0000628 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
Ted Kremenek081ed872007-08-14 17:39:48 +0000629
630 // Inspect the format string.
631 unsigned StrIdx = 0;
632
633 // LastConversionIdx - Index within the format string where we last saw
634 // a '%' character that starts a new format conversion.
635 unsigned LastConversionIdx = 0;
636
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000637 for (; StrIdx < StrLen; ++StrIdx) {
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000638
Ted Kremenek081ed872007-08-14 17:39:48 +0000639 // Is the number of detected conversion conversions greater than
640 // the number of matching data arguments? If so, stop.
641 if (!HasVAListArg && numConversions > numDataArgs) break;
642
643 // Handle "\0"
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000644 if (Str[StrIdx] == '\0') {
Ted Kremenek081ed872007-08-14 17:39:48 +0000645 // The string returned by getStrData() is not null-terminated,
646 // so the presence of a null character is likely an error.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000647 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000648 diag::warn_printf_format_string_contains_null_char)
649 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000650 return;
651 }
652
653 // Ordinary characters (not processing a format conversion).
654 if (CurrentState == state_OrdChr) {
655 if (Str[StrIdx] == '%') {
656 CurrentState = state_Conversion;
657 LastConversionIdx = StrIdx;
658 }
659 continue;
660 }
661
662 // Seen '%'. Now processing a format conversion.
663 switch (Str[StrIdx]) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000664 // Handle dynamic precision or width specifier.
665 case '*': {
666 ++numConversions;
667
668 if (!HasVAListArg && numConversions > numDataArgs) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000669 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
Ted Kremenek035d8792007-10-12 20:51:52 +0000670
Ted Kremenek035d8792007-10-12 20:51:52 +0000671 if (Str[StrIdx-1] == '.')
Chris Lattner9d2cf082008-11-19 05:27:50 +0000672 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
673 << OrigFormatExpr->getSourceRange();
Ted Kremenek035d8792007-10-12 20:51:52 +0000674 else
Chris Lattner9d2cf082008-11-19 05:27:50 +0000675 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
676 << OrigFormatExpr->getSourceRange();
Ted Kremenek035d8792007-10-12 20:51:52 +0000677
Chris Lattner68d88f02007-12-28 05:31:15 +0000678 // Don't do any more checking. We'll just emit spurious errors.
679 return;
Ted Kremenek035d8792007-10-12 20:51:52 +0000680 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000681
682 // Perform type checking on width/precision specifier.
683 Expr *E = TheCall->getArg(format_idx+numConversions);
684 if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
685 if (BT->getKind() == BuiltinType::Int)
686 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000687
Chris Lattnerf17cb362009-02-18 17:49:48 +0000688 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000689
690 if (Str[StrIdx-1] == '.')
Chris Lattner9d2cf082008-11-19 05:27:50 +0000691 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000692 << E->getType() << E->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000693 else
Chris Lattner9d2cf082008-11-19 05:27:50 +0000694 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000695 << E->getType() << E->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000696
697 break;
698 }
699
700 // Characters which can terminate a format conversion
701 // (e.g. "%d"). Characters that specify length modifiers or
702 // other flags are handled by the default case below.
703 //
704 // FIXME: additional checks will go into the following cases.
705 case 'i':
706 case 'd':
707 case 'o':
708 case 'u':
709 case 'x':
710 case 'X':
711 case 'D':
712 case 'O':
713 case 'U':
714 case 'e':
715 case 'E':
716 case 'f':
717 case 'F':
718 case 'g':
719 case 'G':
720 case 'a':
721 case 'A':
722 case 'c':
723 case 'C':
724 case 'S':
725 case 's':
726 case 'p':
727 ++numConversions;
728 CurrentState = state_OrdChr;
729 break;
730
731 // CHECK: Are we using "%n"? Issue a warning.
732 case 'n': {
733 ++numConversions;
734 CurrentState = state_OrdChr;
Chris Lattnerf17cb362009-02-18 17:49:48 +0000735 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
736 LastConversionIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000737
Chris Lattner9d2cf082008-11-19 05:27:50 +0000738 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000739 break;
740 }
Ted Kremenek225a14c2008-06-16 18:00:42 +0000741
742 // Handle "%@"
743 case '@':
744 // %@ is allowed in ObjC format strings only.
745 if(ObjCFExpr != NULL)
746 CurrentState = state_OrdChr;
747 else {
748 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000749 SourceLocation Loc =
750 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek225a14c2008-06-16 18:00:42 +0000751
Chris Lattner77d52da2008-11-20 06:06:08 +0000752 Diag(Loc, diag::warn_printf_invalid_conversion)
753 << std::string(Str+LastConversionIdx,
754 Str+std::min(LastConversionIdx+2, StrLen))
755 << OrigFormatExpr->getSourceRange();
Ted Kremenek225a14c2008-06-16 18:00:42 +0000756 }
757 ++numConversions;
758 break;
759
Chris Lattner68d88f02007-12-28 05:31:15 +0000760 // Handle "%%"
761 case '%':
762 // Sanity check: Was the first "%" character the previous one?
763 // If not, we will assume that we have a malformed format
764 // conversion, and that the current "%" character is the start
765 // of a new conversion.
766 if (StrIdx - LastConversionIdx == 1)
767 CurrentState = state_OrdChr;
768 else {
769 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000770 SourceLocation Loc =
771 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000772
Chris Lattner77d52da2008-11-20 06:06:08 +0000773 Diag(Loc, diag::warn_printf_invalid_conversion)
774 << std::string(Str+LastConversionIdx, Str+StrIdx)
775 << OrigFormatExpr->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000776
777 // This conversion is broken. Advance to the next format
778 // conversion.
779 LastConversionIdx = StrIdx;
780 ++numConversions;
Ted Kremenek081ed872007-08-14 17:39:48 +0000781 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000782 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000783
Chris Lattner68d88f02007-12-28 05:31:15 +0000784 default:
785 // This case catches all other characters: flags, widths, etc.
786 // We should eventually process those as well.
787 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000788 }
789 }
790
791 if (CurrentState == state_Conversion) {
792 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000793 SourceLocation Loc =
794 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek081ed872007-08-14 17:39:48 +0000795
Chris Lattner77d52da2008-11-20 06:06:08 +0000796 Diag(Loc, diag::warn_printf_invalid_conversion)
797 << std::string(Str+LastConversionIdx,
798 Str+std::min(LastConversionIdx+2, StrLen))
799 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000800 return;
801 }
802
803 if (!HasVAListArg) {
804 // CHECK: Does the number of format conversions exceed the number
805 // of data arguments?
806 if (numConversions > numDataArgs) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000807 SourceLocation Loc =
808 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek081ed872007-08-14 17:39:48 +0000809
Chris Lattner9d2cf082008-11-19 05:27:50 +0000810 Diag(Loc, diag::warn_printf_insufficient_data_args)
811 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000812 }
813 // CHECK: Does the number of data arguments exceed the number of
814 // format conversions in the format string?
815 else if (numConversions < numDataArgs)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000816 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000817 diag::warn_printf_too_many_data_args)
818 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000819 }
820}
Ted Kremenek45925ab2007-08-17 16:46:58 +0000821
822//===--- CHECK: Return Address of Stack Variable --------------------------===//
823
824static DeclRefExpr* EvalVal(Expr *E);
825static DeclRefExpr* EvalAddr(Expr* E);
826
827/// CheckReturnStackAddr - Check if a return statement returns the address
828/// of a stack variable.
829void
830Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
831 SourceLocation ReturnLoc) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000832
Ted Kremenek45925ab2007-08-17 16:46:58 +0000833 // Perform checking for returned stack addresses.
Steve Naroffd6163f32008-09-05 22:11:13 +0000834 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000835 if (DeclRefExpr *DR = EvalAddr(RetValExp))
Chris Lattner65cae292008-11-19 08:23:25 +0000836 Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
Chris Lattnerb1753422008-11-23 21:45:46 +0000837 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
Steve Naroff503996b2008-09-16 22:25:10 +0000838
839 // Skip over implicit cast expressions when checking for block expressions.
840 if (ImplicitCastExpr *IcExpr =
841 dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
842 RetValExp = IcExpr->getSubExpr();
843
Steve Naroff3eac7692008-09-10 19:17:48 +0000844 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
Chris Lattner9d2cf082008-11-19 05:27:50 +0000845 Diag(C->getLocStart(), diag::err_ret_local_block)
846 << C->getSourceRange();
Ted Kremenek45925ab2007-08-17 16:46:58 +0000847 }
848 // Perform checking for stack values returned by reference.
849 else if (lhsType->isReferenceType()) {
Douglas Gregor21a04f32008-10-27 19:41:14 +0000850 // Check for a reference to the stack
851 if (DeclRefExpr *DR = EvalVal(RetValExp))
Chris Lattner9d2cf082008-11-19 05:27:50 +0000852 Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
Chris Lattnerb1753422008-11-23 21:45:46 +0000853 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
Ted Kremenek45925ab2007-08-17 16:46:58 +0000854 }
855}
856
857/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
858/// check if the expression in a return statement evaluates to an address
859/// to a location on the stack. The recursion is used to traverse the
860/// AST of the return expression, with recursion backtracking when we
861/// encounter a subexpression that (1) clearly does not lead to the address
862/// of a stack variable or (2) is something we cannot determine leads to
863/// the address of a stack variable based on such local checking.
864///
Ted Kremenekda1300a2007-08-28 17:02:55 +0000865/// EvalAddr processes expressions that are pointers that are used as
866/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek45925ab2007-08-17 16:46:58 +0000867/// At the base case of the recursion is a check for a DeclRefExpr* in
868/// the refers to a stack variable.
869///
870/// This implementation handles:
871///
872/// * pointer-to-pointer casts
873/// * implicit conversions from array references to pointers
874/// * taking the address of fields
875/// * arbitrary interplay between "&" and "*" operators
876/// * pointer arithmetic from an address of a stack variable
877/// * taking the address of an array element where the array is on the stack
878static DeclRefExpr* EvalAddr(Expr *E) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000879 // We should only be called for evaluating pointer expressions.
Steve Naroffd6163f32008-09-05 22:11:13 +0000880 assert((E->getType()->isPointerType() ||
881 E->getType()->isBlockPointerType() ||
Ted Kremenek42730c52008-01-07 19:49:32 +0000882 E->getType()->isObjCQualifiedIdType()) &&
Chris Lattner68d88f02007-12-28 05:31:15 +0000883 "EvalAddr only works on pointers");
Ted Kremenek45925ab2007-08-17 16:46:58 +0000884
885 // Our "symbolic interpreter" is just a dispatch off the currently
886 // viewed AST node. We then recursively traverse the AST by calling
887 // EvalAddr and EvalVal appropriately.
888 switch (E->getStmtClass()) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000889 case Stmt::ParenExprClass:
890 // Ignore parentheses.
891 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000892
Chris Lattner68d88f02007-12-28 05:31:15 +0000893 case Stmt::UnaryOperatorClass: {
894 // The only unary operator that make sense to handle here
895 // is AddrOf. All others don't make sense as pointers.
896 UnaryOperator *U = cast<UnaryOperator>(E);
Ted Kremenek45925ab2007-08-17 16:46:58 +0000897
Chris Lattner68d88f02007-12-28 05:31:15 +0000898 if (U->getOpcode() == UnaryOperator::AddrOf)
899 return EvalVal(U->getSubExpr());
900 else
Ted Kremenek45925ab2007-08-17 16:46:58 +0000901 return NULL;
902 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000903
904 case Stmt::BinaryOperatorClass: {
905 // Handle pointer arithmetic. All other binary operators are not valid
906 // in this context.
907 BinaryOperator *B = cast<BinaryOperator>(E);
908 BinaryOperator::Opcode op = B->getOpcode();
909
910 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
911 return NULL;
912
913 Expr *Base = B->getLHS();
914
915 // Determine which argument is the real pointer base. It could be
916 // the RHS argument instead of the LHS.
917 if (!Base->getType()->isPointerType()) Base = B->getRHS();
918
919 assert (Base->getType()->isPointerType());
920 return EvalAddr(Base);
921 }
Steve Naroff3eac7692008-09-10 19:17:48 +0000922
Chris Lattner68d88f02007-12-28 05:31:15 +0000923 // For conditional operators we need to see if either the LHS or RHS are
924 // valid DeclRefExpr*s. If one of them is valid, we return it.
925 case Stmt::ConditionalOperatorClass: {
926 ConditionalOperator *C = cast<ConditionalOperator>(E);
927
928 // Handle the GNU extension for missing LHS.
929 if (Expr *lhsExpr = C->getLHS())
930 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
931 return LHS;
932
933 return EvalAddr(C->getRHS());
934 }
935
Ted Kremenekea19edd2008-08-07 00:49:01 +0000936 // For casts, we need to handle conversions from arrays to
937 // pointer values, and pointer-to-pointer conversions.
Douglas Gregor21a04f32008-10-27 19:41:14 +0000938 case Stmt::ImplicitCastExprClass:
Douglas Gregor035d0882008-10-28 15:36:24 +0000939 case Stmt::CStyleCastExprClass:
Douglas Gregor21a04f32008-10-27 19:41:14 +0000940 case Stmt::CXXFunctionalCastExprClass: {
Argiris Kirtzidisc45e2fb2008-08-18 23:01:59 +0000941 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
Ted Kremenekea19edd2008-08-07 00:49:01 +0000942 QualType T = SubExpr->getType();
943
Steve Naroffd6163f32008-09-05 22:11:13 +0000944 if (SubExpr->getType()->isPointerType() ||
945 SubExpr->getType()->isBlockPointerType() ||
946 SubExpr->getType()->isObjCQualifiedIdType())
Ted Kremenekea19edd2008-08-07 00:49:01 +0000947 return EvalAddr(SubExpr);
948 else if (T->isArrayType())
Chris Lattner68d88f02007-12-28 05:31:15 +0000949 return EvalVal(SubExpr);
Chris Lattner68d88f02007-12-28 05:31:15 +0000950 else
Ted Kremenekea19edd2008-08-07 00:49:01 +0000951 return 0;
Chris Lattner68d88f02007-12-28 05:31:15 +0000952 }
953
954 // C++ casts. For dynamic casts, static casts, and const casts, we
955 // are always converting from a pointer-to-pointer, so we just blow
Douglas Gregor21a04f32008-10-27 19:41:14 +0000956 // through the cast. In the case the dynamic cast doesn't fail (and
957 // return NULL), we take the conservative route and report cases
Chris Lattner68d88f02007-12-28 05:31:15 +0000958 // where we return the address of a stack variable. For Reinterpre
Douglas Gregor21a04f32008-10-27 19:41:14 +0000959 // FIXME: The comment about is wrong; we're not always converting
960 // from pointer to pointer. I'm guessing that this code should also
961 // handle references to objects.
962 case Stmt::CXXStaticCastExprClass:
963 case Stmt::CXXDynamicCastExprClass:
964 case Stmt::CXXConstCastExprClass:
965 case Stmt::CXXReinterpretCastExprClass: {
966 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
Steve Naroffd6163f32008-09-05 22:11:13 +0000967 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
Chris Lattner68d88f02007-12-28 05:31:15 +0000968 return EvalAddr(S);
969 else
970 return NULL;
Chris Lattner68d88f02007-12-28 05:31:15 +0000971 }
972
973 // Everything else: we simply don't reason about them.
974 default:
975 return NULL;
976 }
Ted Kremenek45925ab2007-08-17 16:46:58 +0000977}
978
979
980/// EvalVal - This function is complements EvalAddr in the mutual recursion.
981/// See the comments for EvalAddr for more details.
982static DeclRefExpr* EvalVal(Expr *E) {
983
Ted Kremenekda1300a2007-08-28 17:02:55 +0000984 // We should only be called for evaluating non-pointer expressions, or
985 // expressions with a pointer type that are not used as references but instead
986 // are l-values (e.g., DeclRefExpr with a pointer type).
987
Ted Kremenek45925ab2007-08-17 16:46:58 +0000988 // Our "symbolic interpreter" is just a dispatch off the currently
989 // viewed AST node. We then recursively traverse the AST by calling
990 // EvalAddr and EvalVal appropriately.
991 switch (E->getStmtClass()) {
Douglas Gregor566782a2009-01-06 05:10:23 +0000992 case Stmt::DeclRefExprClass:
993 case Stmt::QualifiedDeclRefExprClass: {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000994 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
995 // at code that refers to a variable's name. We check if it has local
996 // storage within the function, and if so, return the expression.
997 DeclRefExpr *DR = cast<DeclRefExpr>(E);
998
999 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
Douglas Gregor81c29152008-10-29 00:13:59 +00001000 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
Ted Kremenek45925ab2007-08-17 16:46:58 +00001001
1002 return NULL;
1003 }
1004
1005 case Stmt::ParenExprClass:
1006 // Ignore parentheses.
1007 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
1008
1009 case Stmt::UnaryOperatorClass: {
1010 // The only unary operator that make sense to handle here
1011 // is Deref. All others don't resolve to a "name." This includes
1012 // handling all sorts of rvalues passed to a unary operator.
1013 UnaryOperator *U = cast<UnaryOperator>(E);
1014
1015 if (U->getOpcode() == UnaryOperator::Deref)
1016 return EvalAddr(U->getSubExpr());
1017
1018 return NULL;
1019 }
1020
1021 case Stmt::ArraySubscriptExprClass: {
1022 // Array subscripts are potential references to data on the stack. We
1023 // retrieve the DeclRefExpr* for the array variable if it indeed
1024 // has local storage.
Ted Kremenek1c1700f2007-08-20 16:18:38 +00001025 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek45925ab2007-08-17 16:46:58 +00001026 }
1027
1028 case Stmt::ConditionalOperatorClass: {
1029 // For conditional operators we need to see if either the LHS or RHS are
1030 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
1031 ConditionalOperator *C = cast<ConditionalOperator>(E);
1032
Anders Carlsson37365fc2007-11-30 19:04:31 +00001033 // Handle the GNU extension for missing LHS.
1034 if (Expr *lhsExpr = C->getLHS())
1035 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1036 return LHS;
1037
1038 return EvalVal(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +00001039 }
1040
1041 // Accesses to members are potential references to data on the stack.
1042 case Stmt::MemberExprClass: {
1043 MemberExpr *M = cast<MemberExpr>(E);
1044
1045 // Check for indirect access. We only want direct field accesses.
1046 if (!M->isArrow())
1047 return EvalVal(M->getBase());
1048 else
1049 return NULL;
1050 }
1051
1052 // Everything else: we simply don't reason about them.
1053 default:
1054 return NULL;
1055 }
1056}
Ted Kremenek30c66752007-11-25 00:58:00 +00001057
1058//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1059
1060/// Check for comparisons of floating point operands using != and ==.
1061/// Issue a warning if these are no self-comparisons, as they are not likely
1062/// to do what the programmer intended.
1063void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1064 bool EmitWarning = true;
1065
Ted Kremenek87e30c52008-01-17 16:57:34 +00001066 Expr* LeftExprSansParen = lex->IgnoreParens();
Ted Kremenek24c61682008-01-17 17:55:13 +00001067 Expr* RightExprSansParen = rex->IgnoreParens();
Ted Kremenek30c66752007-11-25 00:58:00 +00001068
1069 // Special case: check for x == x (which is OK).
1070 // Do not emit warnings for such cases.
1071 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1072 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1073 if (DRL->getDecl() == DRR->getDecl())
1074 EmitWarning = false;
1075
Ted Kremenek33159832007-11-29 00:59:04 +00001076
1077 // Special case: check for comparisons against literals that can be exactly
1078 // represented by APFloat. In such cases, do not emit a warning. This
1079 // is a heuristic: often comparison against such literals are used to
1080 // detect if a value in a variable has not changed. This clearly can
1081 // lead to false negatives.
1082 if (EmitWarning) {
1083 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1084 if (FLL->isExact())
1085 EmitWarning = false;
1086 }
1087 else
1088 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1089 if (FLR->isExact())
1090 EmitWarning = false;
1091 }
1092 }
1093
Ted Kremenek30c66752007-11-25 00:58:00 +00001094 // Check for comparisons with builtin types.
Sebastian Redl8b769972009-01-19 00:08:26 +00001095 if (EmitWarning)
Ted Kremenek30c66752007-11-25 00:58:00 +00001096 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
Douglas Gregorb5af7382009-02-14 18:57:46 +00001097 if (CL->isBuiltinCall(Context))
Ted Kremenek30c66752007-11-25 00:58:00 +00001098 EmitWarning = false;
1099
Sebastian Redl8b769972009-01-19 00:08:26 +00001100 if (EmitWarning)
Ted Kremenek30c66752007-11-25 00:58:00 +00001101 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
Douglas Gregorb5af7382009-02-14 18:57:46 +00001102 if (CR->isBuiltinCall(Context))
Ted Kremenek30c66752007-11-25 00:58:00 +00001103 EmitWarning = false;
1104
1105 // Emit the diagnostic.
1106 if (EmitWarning)
Chris Lattner8ba580c2008-11-19 05:08:23 +00001107 Diag(loc, diag::warn_floatingpoint_eq)
1108 << lex->getSourceRange() << rex->getSourceRange();
Ted Kremenek30c66752007-11-25 00:58:00 +00001109}