blob: 2f837822f86e476c3cb04ba551b502de6348a6b8 [file] [log] [blame]
Chris Lattner2e64c072007-08-10 20:18:51 +00001//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner2e64c072007-08-10 20:18:51 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This file implements extra semantic analysis beyond what is enforced
11// by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
Daniel Dunbar64789f82008-08-11 05:35:13 +000017#include "clang/AST/DeclObjC.h"
Ted Kremenek1c1700f2007-08-20 16:18:38 +000018#include "clang/AST/ExprCXX.h"
Ted Kremenek225a14c2008-06-16 18:00:42 +000019#include "clang/AST/ExprObjC.h"
Chris Lattnerbe93e792009-02-18 19:21:10 +000020#include "clang/Lex/LiteralSupport.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000021#include "clang/Lex/Preprocessor.h"
Chris Lattner2e64c072007-08-10 20:18:51 +000022using namespace clang;
23
Chris Lattnerf17cb362009-02-18 17:49:48 +000024/// getLocationOfStringLiteralByte - Return a source location that points to the
25/// specified byte of the specified string literal.
26///
27/// Strings are amazingly complex. They can be formed from multiple tokens and
28/// can have escape sequences in them in addition to the usual trigraph and
29/// escaped newline business. This routine handles this complexity.
30///
31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
32 unsigned ByteNo) const {
33 assert(!SL->isWide() && "This doesn't work for wide strings yet");
34
35 // Loop over all of the tokens in this string until we find the one that
36 // contains the byte we're looking for.
37 unsigned TokNo = 0;
38 while (1) {
39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
41
42 // Get the spelling of the string so that we can get the data that makes up
43 // the string literal, not the identifier for the macro it is potentially
44 // expanded through.
45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
46
47 // Re-lex the token to get its length and original spelling.
48 std::pair<FileID, unsigned> LocInfo =
49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
50 std::pair<const char *,const char *> Buffer =
51 SourceMgr.getBufferData(LocInfo.first);
52 const char *StrData = Buffer.first+LocInfo.second;
53
54 // Create a langops struct and enable trigraphs. This is sufficient for
55 // relexing tokens.
56 LangOptions LangOpts;
57 LangOpts.Trigraphs = true;
58
59 // Create a lexer starting at the beginning of this token.
60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
61 Buffer.second);
62 Token TheTok;
63 TheLexer.LexFromRawLexer(TheTok);
64
Chris Lattnerf6d44722009-02-18 19:26:42 +000065 // Use the StringLiteralParser to compute the length of the string in bytes.
66 StringLiteralParser SLP(&TheTok, 1, PP);
67 unsigned TokNumBytes = SLP.GetStringLength();
Chris Lattner30183b02009-02-18 18:34:12 +000068
Chris Lattner81df8462009-02-18 18:52:52 +000069 // If the byte is in this token, return the location of the byte.
Chris Lattnerf17cb362009-02-18 17:49:48 +000070 if (ByteNo < TokNumBytes ||
71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
Chris Lattnerbe93e792009-02-18 19:21:10 +000072 unsigned Offset =
73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
74
75 // Now that we know the offset of the token in the spelling, use the
76 // preprocessor to get the offset in the original source.
77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
Chris Lattnerf17cb362009-02-18 17:49:48 +000078 }
79
80 // Move to the next string token.
81 ++TokNo;
82 ByteNo -= TokNumBytes;
83 }
84}
85
86
Chris Lattner2e64c072007-08-10 20:18:51 +000087/// CheckFunctionCall - Check a direct function call for various correctness
88/// and safety properties not strictly enforced by the C type system.
Sebastian Redl8b769972009-01-19 00:08:26 +000089Action::OwningExprResult
90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
91 OwningExprResult TheCallResult(Owned(TheCall));
Chris Lattner2e64c072007-08-10 20:18:51 +000092 // Get the IdentifierInfo* for the called function.
93 IdentifierInfo *FnInfo = FDecl->getIdentifier();
Douglas Gregorb0212bd2008-11-17 20:34:05 +000094
95 // None of the checks below are needed for functions that don't have
96 // simple names (e.g., C++ conversion functions).
97 if (!FnInfo)
Sebastian Redl8b769972009-01-19 00:08:26 +000098 return move(TheCallResult);
Douglas Gregorb0212bd2008-11-17 20:34:05 +000099
Douglas Gregorb5af7382009-02-14 18:57:46 +0000100 switch (FDecl->getBuiltinID(Context)) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000101 case Builtin::BI__builtin___CFStringMakeConstantString:
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000102 assert(TheCall->getNumArgs() == 1 &&
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000103 "Wrong # arguments to builtin CFStringMakeConstantString");
Chris Lattner81f5be22009-02-18 06:01:06 +0000104 if (CheckObjCString(TheCall->getArg(0)))
Sebastian Redl8b769972009-01-19 00:08:26 +0000105 return ExprError();
106 return move(TheCallResult);
Ted Kremenek7a0654c2008-07-09 17:58:53 +0000107 case Builtin::BI__builtin_stdarg_start:
Chris Lattnerf22a8502007-12-19 23:59:04 +0000108 case Builtin::BI__builtin_va_start:
Sebastian Redl8b769972009-01-19 00:08:26 +0000109 if (SemaBuiltinVAStart(TheCall))
110 return ExprError();
111 return move(TheCallResult);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000112 case Builtin::BI__builtin_isgreater:
113 case Builtin::BI__builtin_isgreaterequal:
114 case Builtin::BI__builtin_isless:
115 case Builtin::BI__builtin_islessequal:
116 case Builtin::BI__builtin_islessgreater:
117 case Builtin::BI__builtin_isunordered:
Sebastian Redl8b769972009-01-19 00:08:26 +0000118 if (SemaBuiltinUnorderedCompare(TheCall))
119 return ExprError();
120 return move(TheCallResult);
Eli Friedman8c50c622008-05-20 08:23:37 +0000121 case Builtin::BI__builtin_return_address:
122 case Builtin::BI__builtin_frame_address:
Sebastian Redl8b769972009-01-19 00:08:26 +0000123 if (SemaBuiltinStackAddress(TheCall))
124 return ExprError();
125 return move(TheCallResult);
Eli Friedmand0e9d092008-05-14 19:38:39 +0000126 case Builtin::BI__builtin_shufflevector:
Sebastian Redl8b769972009-01-19 00:08:26 +0000127 return SemaBuiltinShuffleVector(TheCall);
128 // TheCall will be freed by the smart pointer here, but that's fine, since
129 // SemaBuiltinShuffleVector guts it, but then doesn't release it.
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000130 case Builtin::BI__builtin_prefetch:
Sebastian Redl8b769972009-01-19 00:08:26 +0000131 if (SemaBuiltinPrefetch(TheCall))
132 return ExprError();
133 return move(TheCallResult);
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000134 case Builtin::BI__builtin_object_size:
Sebastian Redl8b769972009-01-19 00:08:26 +0000135 if (SemaBuiltinObjectSize(TheCall))
136 return ExprError();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000137 }
Daniel Dunbar0ab03e62008-10-02 18:44:07 +0000138
139 // FIXME: This mechanism should be abstracted to be less fragile and
140 // more efficient. For example, just map function ids to custom
141 // handlers.
142
Chris Lattner2e64c072007-08-10 20:18:51 +0000143 // Printf checking.
Douglas Gregorb5af7382009-02-14 18:57:46 +0000144 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
145 if (Format->getType() == "printf") {
146 bool HasVAListArg = false;
147 if (const FunctionTypeProto *Proto
148 = FDecl->getType()->getAsFunctionTypeProto())
149 HasVAListArg = !Proto->isVariadic();
150 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
151 Format->getFirstArg() - 1);
152 }
Chris Lattner2e64c072007-08-10 20:18:51 +0000153 }
Sebastian Redl8b769972009-01-19 00:08:26 +0000154
155 return move(TheCallResult);
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000156}
157
Chris Lattner81f5be22009-02-18 06:01:06 +0000158/// CheckObjCString - Checks that the argument to the builtin
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000159/// CFString constructor is correct
Chris Lattner81f5be22009-02-18 06:01:06 +0000160bool Sema::CheckObjCString(Expr *Arg) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000161 Arg = Arg->IgnoreParenCasts();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000162 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
163
164 if (!Literal || Literal->isWide()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000165 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
166 << Arg->getSourceRange();
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000167 return true;
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000168 }
169
170 const char *Data = Literal->getStrData();
171 unsigned Length = Literal->getByteLength();
172
173 for (unsigned i = 0; i < Length; ++i) {
174 if (!isascii(Data[i])) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000175 Diag(getLocationOfStringLiteralByte(Literal, i),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000176 diag::warn_cfstring_literal_contains_non_ascii_character)
177 << Arg->getSourceRange();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000178 break;
179 }
180
181 if (!Data[i]) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000182 Diag(getLocationOfStringLiteralByte(Literal, i),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000183 diag::warn_cfstring_literal_contains_nul_character)
184 << Arg->getSourceRange();
Anders Carlssone7e7aa22007-08-17 05:31:46 +0000185 break;
186 }
187 }
188
Anders Carlsson3e9b43b2007-08-17 15:44:17 +0000189 return false;
Chris Lattner2e64c072007-08-10 20:18:51 +0000190}
191
Chris Lattner3b933692007-12-20 00:05:45 +0000192/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
193/// Emit an error and return true on failure, return false on success.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000194bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
195 Expr *Fn = TheCall->getCallee();
196 if (TheCall->getNumArgs() > 2) {
Chris Lattner66beaba2008-11-21 18:44:24 +0000197 Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000198 diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000199 << 0 /*function call*/ << Fn->getSourceRange()
Chris Lattner8ba580c2008-11-19 05:08:23 +0000200 << SourceRange(TheCall->getArg(2)->getLocStart(),
201 (*(TheCall->arg_end()-1))->getLocEnd());
Chris Lattnerf22a8502007-12-19 23:59:04 +0000202 return true;
203 }
Eli Friedman6422de62008-12-15 22:05:35 +0000204
205 if (TheCall->getNumArgs() < 2) {
206 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
207 << 0 /*function call*/;
208 }
209
Chris Lattner3b933692007-12-20 00:05:45 +0000210 // Determine whether the current function is variadic or not.
211 bool isVariadic;
Eli Friedman6422de62008-12-15 22:05:35 +0000212 if (getCurFunctionDecl()) {
213 if (FunctionTypeProto* FTP =
214 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType()))
215 isVariadic = FTP->isVariadic();
216 else
217 isVariadic = false;
218 } else {
Argiris Kirtzidis95256e62008-06-28 06:07:14 +0000219 isVariadic = getCurMethodDecl()->isVariadic();
Eli Friedman6422de62008-12-15 22:05:35 +0000220 }
Chris Lattnerf22a8502007-12-19 23:59:04 +0000221
Chris Lattner3b933692007-12-20 00:05:45 +0000222 if (!isVariadic) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000223 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
224 return true;
225 }
226
227 // Verify that the second argument to the builtin is the last argument of the
228 // current function or method.
229 bool SecondArgIsLastNamedArgument = false;
Anders Carlsson924556e2008-02-13 01:22:59 +0000230 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
Anders Carlssonc27156b2008-02-11 04:20:54 +0000231
232 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
233 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
Chris Lattnerf22a8502007-12-19 23:59:04 +0000234 // FIXME: This isn't correct for methods (results in bogus warning).
235 // Get the last formal in the current function.
Anders Carlssonc27156b2008-02-11 04:20:54 +0000236 const ParmVarDecl *LastArg;
Chris Lattnere5cb5862008-12-04 23:50:19 +0000237 if (FunctionDecl *FD = getCurFunctionDecl())
238 LastArg = *(FD->param_end()-1);
Chris Lattnerf22a8502007-12-19 23:59:04 +0000239 else
Argiris Kirtzidis95256e62008-06-28 06:07:14 +0000240 LastArg = *(getCurMethodDecl()->param_end()-1);
Chris Lattnerf22a8502007-12-19 23:59:04 +0000241 SecondArgIsLastNamedArgument = PV == LastArg;
242 }
243 }
244
245 if (!SecondArgIsLastNamedArgument)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000246 Diag(TheCall->getArg(1)->getLocStart(),
Chris Lattnerf22a8502007-12-19 23:59:04 +0000247 diag::warn_second_parameter_of_va_start_not_last_named_argument);
248 return false;
Eli Friedman8c50c622008-05-20 08:23:37 +0000249}
Chris Lattnerf22a8502007-12-19 23:59:04 +0000250
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000251/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
252/// friends. This is declared to take (...), so we have to check everything.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000253bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
254 if (TheCall->getNumArgs() < 2)
Chris Lattner66beaba2008-11-21 18:44:24 +0000255 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
256 << 0 /*function call*/;
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000257 if (TheCall->getNumArgs() > 2)
258 return Diag(TheCall->getArg(2)->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000259 diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000260 << 0 /*function call*/
Chris Lattner8ba580c2008-11-19 05:08:23 +0000261 << SourceRange(TheCall->getArg(2)->getLocStart(),
262 (*(TheCall->arg_end()-1))->getLocEnd());
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000263
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000264 Expr *OrigArg0 = TheCall->getArg(0);
265 Expr *OrigArg1 = TheCall->getArg(1);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000266
267 // Do standard promotions between the two arguments, returning their common
268 // type.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000269 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000270
271 // If the common type isn't a real floating type, then the arguments were
272 // invalid for this operation.
273 if (!Res->isRealFloatingType())
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000274 return Diag(OrigArg0->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000275 diag::err_typecheck_call_invalid_ordered_compare)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000276 << OrigArg0->getType() << OrigArg1->getType()
Chris Lattner8ba580c2008-11-19 05:08:23 +0000277 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
Chris Lattner7c8d1af2007-12-20 00:26:33 +0000278
279 return false;
280}
281
Eli Friedman8c50c622008-05-20 08:23:37 +0000282bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
283 // The signature for these builtins is exact; the only thing we need
284 // to check is that the argument is a constant.
285 SourceLocation Loc;
Chris Lattner941c0102008-08-10 02:05:13 +0000286 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
Chris Lattner8ba580c2008-11-19 05:08:23 +0000287 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
Chris Lattner941c0102008-08-10 02:05:13 +0000288
Eli Friedman8c50c622008-05-20 08:23:37 +0000289 return false;
290}
291
Eli Friedmand0e9d092008-05-14 19:38:39 +0000292/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
293// This is declared to take (...), so we have to check everything.
Sebastian Redl8b769972009-01-19 00:08:26 +0000294Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
Eli Friedmand0e9d092008-05-14 19:38:39 +0000295 if (TheCall->getNumArgs() < 3)
Sebastian Redl8b769972009-01-19 00:08:26 +0000296 return ExprError(Diag(TheCall->getLocEnd(),
297 diag::err_typecheck_call_too_few_args)
298 << 0 /*function call*/ << TheCall->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000299
300 QualType FAType = TheCall->getArg(0)->getType();
301 QualType SAType = TheCall->getArg(1)->getType();
302
303 if (!FAType->isVectorType() || !SAType->isVectorType()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000304 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
305 << SourceRange(TheCall->getArg(0)->getLocStart(),
306 TheCall->getArg(1)->getLocEnd());
Sebastian Redl8b769972009-01-19 00:08:26 +0000307 return ExprError();
Eli Friedmand0e9d092008-05-14 19:38:39 +0000308 }
309
Chris Lattnerd5a56aa2008-07-26 22:17:49 +0000310 if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
311 Context.getCanonicalType(SAType).getUnqualifiedType()) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000312 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
313 << SourceRange(TheCall->getArg(0)->getLocStart(),
314 TheCall->getArg(1)->getLocEnd());
Sebastian Redl8b769972009-01-19 00:08:26 +0000315 return ExprError();
Eli Friedmand0e9d092008-05-14 19:38:39 +0000316 }
317
318 unsigned numElements = FAType->getAsVectorType()->getNumElements();
319 if (TheCall->getNumArgs() != numElements+2) {
320 if (TheCall->getNumArgs() < numElements+2)
Sebastian Redl8b769972009-01-19 00:08:26 +0000321 return ExprError(Diag(TheCall->getLocEnd(),
322 diag::err_typecheck_call_too_few_args)
323 << 0 /*function call*/ << TheCall->getSourceRange());
324 return ExprError(Diag(TheCall->getLocEnd(),
325 diag::err_typecheck_call_too_many_args)
326 << 0 /*function call*/ << TheCall->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000327 }
328
329 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
330 llvm::APSInt Result(32);
Chris Lattner941c0102008-08-10 02:05:13 +0000331 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
Sebastian Redl8b769972009-01-19 00:08:26 +0000332 return ExprError(Diag(TheCall->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000333 diag::err_shufflevector_nonconstant_argument)
Sebastian Redl8b769972009-01-19 00:08:26 +0000334 << TheCall->getArg(i)->getSourceRange());
335
Chris Lattner941c0102008-08-10 02:05:13 +0000336 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
Sebastian Redl8b769972009-01-19 00:08:26 +0000337 return ExprError(Diag(TheCall->getLocStart(),
Chris Lattner8ba580c2008-11-19 05:08:23 +0000338 diag::err_shufflevector_argument_too_large)
Sebastian Redl8b769972009-01-19 00:08:26 +0000339 << TheCall->getArg(i)->getSourceRange());
Eli Friedmand0e9d092008-05-14 19:38:39 +0000340 }
341
342 llvm::SmallVector<Expr*, 32> exprs;
343
Chris Lattner941c0102008-08-10 02:05:13 +0000344 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
Eli Friedmand0e9d092008-05-14 19:38:39 +0000345 exprs.push_back(TheCall->getArg(i));
346 TheCall->setArg(i, 0);
347 }
348
Ted Kremenek0c97e042009-02-07 01:47:29 +0000349 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2,
350 FAType,
351 TheCall->getCallee()->getLocStart(),
352 TheCall->getRParenLoc()));
Eli Friedmand0e9d092008-05-14 19:38:39 +0000353}
Chris Lattnerf22a8502007-12-19 23:59:04 +0000354
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000355/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
356// This is declared to take (const void*, ...) and can take two
357// optional constant int args.
358bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000359 unsigned NumArgs = TheCall->getNumArgs();
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000360
Chris Lattner8ba580c2008-11-19 05:08:23 +0000361 if (NumArgs > 3)
362 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
Chris Lattner66beaba2008-11-21 18:44:24 +0000363 << 0 /*function call*/ << TheCall->getSourceRange();
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000364
365 // Argument 0 is checked for us and the remaining arguments must be
366 // constant integers.
Chris Lattner8ba580c2008-11-19 05:08:23 +0000367 for (unsigned i = 1; i != NumArgs; ++i) {
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000368 Expr *Arg = TheCall->getArg(i);
369 QualType RWType = Arg->getType();
370
371 const BuiltinType *BT = RWType->getAsBuiltinType();
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000372 llvm::APSInt Result;
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000373 if (!BT || BT->getKind() != BuiltinType::Int ||
Chris Lattner8ba580c2008-11-19 05:08:23 +0000374 !Arg->isIntegerConstantExpr(Result, Context))
375 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
376 << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000377
378 // FIXME: gcc issues a warning and rewrites these to 0. These
379 // seems especially odd for the third argument since the default
380 // is 3.
Chris Lattner8ba580c2008-11-19 05:08:23 +0000381 if (i == 1) {
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000382 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
Chris Lattner8ba580c2008-11-19 05:08:23 +0000383 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
384 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000385 } else {
386 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
Chris Lattner8ba580c2008-11-19 05:08:23 +0000387 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
388 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000389 }
390 }
391
Chris Lattner8ba580c2008-11-19 05:08:23 +0000392 return false;
Daniel Dunbar5b0de852008-07-21 22:59:13 +0000393}
394
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000395/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
396/// int type). This simply type checks that type is one of the defined
397/// constants (0-3).
398bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
399 Expr *Arg = TheCall->getArg(1);
400 QualType ArgType = Arg->getType();
401 const BuiltinType *BT = ArgType->getAsBuiltinType();
402 llvm::APSInt Result(32);
403 if (!BT || BT->getKind() != BuiltinType::Int ||
404 !Arg->isIntegerConstantExpr(Result, Context)) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000405 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
406 << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000407 }
408
409 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
Chris Lattner8ba580c2008-11-19 05:08:23 +0000410 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
411 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
Daniel Dunbar30ad42d2008-09-03 21:13:56 +0000412 }
413
414 return false;
415}
416
Ted Kremenek8c797c02009-01-12 23:09:09 +0000417// Handle i > 1 ? "x" : "y", recursivelly
418bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000419 unsigned format_idx, unsigned firstDataArg) {
Ted Kremenek8c797c02009-01-12 23:09:09 +0000420
421 switch (E->getStmtClass()) {
422 case Stmt::ConditionalOperatorClass: {
423 ConditionalOperator *C = cast<ConditionalOperator>(E);
424 return SemaCheckStringLiteral(C->getLHS(), TheCall,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000425 HasVAListArg, format_idx, firstDataArg)
Ted Kremenek8c797c02009-01-12 23:09:09 +0000426 && SemaCheckStringLiteral(C->getRHS(), TheCall,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000427 HasVAListArg, format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000428 }
429
430 case Stmt::ImplicitCastExprClass: {
431 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E);
432 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000433 format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000434 }
435
436 case Stmt::ParenExprClass: {
437 ParenExpr *Expr = dyn_cast<ParenExpr>(E);
438 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000439 format_idx, firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000440 }
441
442 default: {
443 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E);
444 StringLiteral *StrE = NULL;
445
446 if (ObjCFExpr)
447 StrE = ObjCFExpr->getString();
448 else
449 StrE = dyn_cast<StringLiteral>(E);
450
451 if (StrE) {
Douglas Gregorb5af7382009-02-14 18:57:46 +0000452 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
453 firstDataArg);
Ted Kremenek8c797c02009-01-12 23:09:09 +0000454 return true;
455 }
456
457 return false;
458 }
459 }
460}
461
462
Chris Lattner2e64c072007-08-10 20:18:51 +0000463/// CheckPrintfArguments - Check calls to printf (and similar functions) for
Ted Kremenek081ed872007-08-14 17:39:48 +0000464/// correct use of format strings.
465///
466/// HasVAListArg - A predicate indicating whether the printf-like
467/// function is passed an explicit va_arg argument (e.g., vprintf)
468///
469/// format_idx - The index into Args for the format string.
470///
471/// Improper format strings to functions in the printf family can be
472/// the source of bizarre bugs and very serious security holes. A
473/// good source of information is available in the following paper
474/// (which includes additional references):
Chris Lattner2e64c072007-08-10 20:18:51 +0000475///
476/// FormatGuard: Automatic Protection From printf Format String
477/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
Ted Kremenek081ed872007-08-14 17:39:48 +0000478///
479/// Functionality implemented:
480///
481/// We can statically check the following properties for string
482/// literal format strings for non v.*printf functions (where the
483/// arguments are passed directly):
484//
485/// (1) Are the number of format conversions equal to the number of
486/// data arguments?
487///
488/// (2) Does each format conversion correctly match the type of the
489/// corresponding data argument? (TODO)
490///
491/// Moreover, for all printf functions we can:
492///
493/// (3) Check for a missing format string (when not caught by type checking).
494///
495/// (4) Check for no-operation flags; e.g. using "#" with format
496/// conversion 'c' (TODO)
497///
498/// (5) Check the use of '%n', a major source of security holes.
499///
500/// (6) Check for malformed format conversions that don't specify anything.
501///
502/// (7) Check for empty format strings. e.g: printf("");
503///
504/// (8) Check that the format string is a wide literal.
505///
Ted Kremenekc2804c22008-03-03 16:50:00 +0000506/// (9) Also check the arguments of functions with the __format__ attribute.
507/// (TODO).
508///
Ted Kremenek081ed872007-08-14 17:39:48 +0000509/// All of these checks can be done by parsing the format string.
510///
511/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
Chris Lattner2e64c072007-08-10 20:18:51 +0000512void
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000513Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000514 unsigned format_idx, unsigned firstDataArg) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000515 Expr *Fn = TheCall->getCallee();
516
Ted Kremenek081ed872007-08-14 17:39:48 +0000517 // CHECK: printf-like function is called with no format string.
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000518 if (format_idx >= TheCall->getNumArgs()) {
Chris Lattner9d2cf082008-11-19 05:27:50 +0000519 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
520 << Fn->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000521 return;
522 }
523
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000524 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
Chris Lattnere65acc12007-08-25 05:36:18 +0000525
Chris Lattner2e64c072007-08-10 20:18:51 +0000526 // CHECK: format string is not a string literal.
527 //
Ted Kremenek081ed872007-08-14 17:39:48 +0000528 // Dynamically generated format strings are difficult to
529 // automatically vet at compile time. Requiring that format strings
530 // are string literals: (1) permits the checking of format strings by
531 // the compiler and thereby (2) can practically remove the source of
532 // many format string exploits.
Ted Kremenek225a14c2008-06-16 18:00:42 +0000533
534 // Format string can be either ObjC string (e.g. @"%d") or
535 // C string (e.g. "%d")
536 // ObjC string uses the same format specifiers as C string, so we can use
537 // the same format string checking logic for both ObjC and C strings.
Douglas Gregorb5af7382009-02-14 18:57:46 +0000538 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall,
539 HasVAListArg, format_idx,
540 firstDataArg);
Ted Kremenek225a14c2008-06-16 18:00:42 +0000541
Ted Kremenek8c797c02009-01-12 23:09:09 +0000542 if (!isFExpr) {
Ted Kremenek19398b62007-12-17 19:03:13 +0000543 // For vprintf* functions (i.e., HasVAListArg==true), we add a
544 // special check to see if the format string is a function parameter
545 // of the function calling the printf function. If the function
546 // has an attribute indicating it is a printf-like function, then we
547 // should suppress warnings concerning non-literals being used in a call
548 // to a vprintf function. For example:
549 //
550 // void
551 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
552 // va_list ap;
553 // va_start(ap, fmt);
554 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
555 // ...
556 //
557 //
558 // FIXME: We don't have full attribute support yet, so just check to see
559 // if the argument is a DeclRefExpr that references a parameter. We'll
560 // add proper support for checking the attribute later.
561 if (HasVAListArg)
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000562 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
563 if (isa<ParmVarDecl>(DR->getDecl()))
Ted Kremenek19398b62007-12-17 19:03:13 +0000564 return;
Ted Kremenek8c797c02009-01-12 23:09:09 +0000565
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000566 Diag(TheCall->getArg(format_idx)->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000567 diag::warn_printf_not_string_constant)
568 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000569 return;
570 }
Ted Kremenek8c797c02009-01-12 23:09:09 +0000571}
Ted Kremenek081ed872007-08-14 17:39:48 +0000572
Ted Kremenek8c797c02009-01-12 23:09:09 +0000573void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
Douglas Gregorb5af7382009-02-14 18:57:46 +0000574 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx,
575 unsigned firstDataArg) {
Ted Kremenek8c797c02009-01-12 23:09:09 +0000576
577 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
Ted Kremenek081ed872007-08-14 17:39:48 +0000578 // CHECK: is the format string a wide literal?
579 if (FExpr->isWide()) {
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000580 Diag(FExpr->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000581 diag::warn_printf_format_string_is_wide_literal)
582 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000583 return;
584 }
585
586 // Str - The format string. NOTE: this is NOT null-terminated!
587 const char * const Str = FExpr->getStrData();
588
589 // CHECK: empty format string?
590 const unsigned StrLen = FExpr->getByteLength();
591
592 if (StrLen == 0) {
Chris Lattner9d2cf082008-11-19 05:27:50 +0000593 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
594 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000595 return;
596 }
597
598 // We process the format string using a binary state machine. The
599 // current state is stored in CurrentState.
600 enum {
601 state_OrdChr,
602 state_Conversion
603 } CurrentState = state_OrdChr;
604
605 // numConversions - The number of conversions seen so far. This is
606 // incremented as we traverse the format string.
607 unsigned numConversions = 0;
608
609 // numDataArgs - The number of data arguments after the format
610 // string. This can only be determined for non vprintf-like
611 // functions. For those functions, this value is 1 (the sole
612 // va_arg argument).
Douglas Gregorb5af7382009-02-14 18:57:46 +0000613 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
Ted Kremenek081ed872007-08-14 17:39:48 +0000614
615 // Inspect the format string.
616 unsigned StrIdx = 0;
617
618 // LastConversionIdx - Index within the format string where we last saw
619 // a '%' character that starts a new format conversion.
620 unsigned LastConversionIdx = 0;
621
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000622 for (; StrIdx < StrLen; ++StrIdx) {
Chris Lattner3d5a8f32007-12-28 05:38:24 +0000623
Ted Kremenek081ed872007-08-14 17:39:48 +0000624 // Is the number of detected conversion conversions greater than
625 // the number of matching data arguments? If so, stop.
626 if (!HasVAListArg && numConversions > numDataArgs) break;
627
628 // Handle "\0"
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000629 if (Str[StrIdx] == '\0') {
Ted Kremenek081ed872007-08-14 17:39:48 +0000630 // The string returned by getStrData() is not null-terminated,
631 // so the presence of a null character is likely an error.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000632 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000633 diag::warn_printf_format_string_contains_null_char)
634 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000635 return;
636 }
637
638 // Ordinary characters (not processing a format conversion).
639 if (CurrentState == state_OrdChr) {
640 if (Str[StrIdx] == '%') {
641 CurrentState = state_Conversion;
642 LastConversionIdx = StrIdx;
643 }
644 continue;
645 }
646
647 // Seen '%'. Now processing a format conversion.
648 switch (Str[StrIdx]) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000649 // Handle dynamic precision or width specifier.
650 case '*': {
651 ++numConversions;
652
653 if (!HasVAListArg && numConversions > numDataArgs) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000654 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
Ted Kremenek035d8792007-10-12 20:51:52 +0000655
Ted Kremenek035d8792007-10-12 20:51:52 +0000656 if (Str[StrIdx-1] == '.')
Chris Lattner9d2cf082008-11-19 05:27:50 +0000657 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
658 << OrigFormatExpr->getSourceRange();
Ted Kremenek035d8792007-10-12 20:51:52 +0000659 else
Chris Lattner9d2cf082008-11-19 05:27:50 +0000660 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
661 << OrigFormatExpr->getSourceRange();
Ted Kremenek035d8792007-10-12 20:51:52 +0000662
Chris Lattner68d88f02007-12-28 05:31:15 +0000663 // Don't do any more checking. We'll just emit spurious errors.
664 return;
Ted Kremenek035d8792007-10-12 20:51:52 +0000665 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000666
667 // Perform type checking on width/precision specifier.
668 Expr *E = TheCall->getArg(format_idx+numConversions);
669 if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
670 if (BT->getKind() == BuiltinType::Int)
671 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000672
Chris Lattnerf17cb362009-02-18 17:49:48 +0000673 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000674
675 if (Str[StrIdx-1] == '.')
Chris Lattner9d2cf082008-11-19 05:27:50 +0000676 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000677 << E->getType() << E->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000678 else
Chris Lattner9d2cf082008-11-19 05:27:50 +0000679 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
Chris Lattner4bfd2232008-11-24 06:25:27 +0000680 << E->getType() << E->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000681
682 break;
683 }
684
685 // Characters which can terminate a format conversion
686 // (e.g. "%d"). Characters that specify length modifiers or
687 // other flags are handled by the default case below.
688 //
689 // FIXME: additional checks will go into the following cases.
690 case 'i':
691 case 'd':
692 case 'o':
693 case 'u':
694 case 'x':
695 case 'X':
696 case 'D':
697 case 'O':
698 case 'U':
699 case 'e':
700 case 'E':
701 case 'f':
702 case 'F':
703 case 'g':
704 case 'G':
705 case 'a':
706 case 'A':
707 case 'c':
708 case 'C':
709 case 'S':
710 case 's':
711 case 'p':
712 ++numConversions;
713 CurrentState = state_OrdChr;
714 break;
715
716 // CHECK: Are we using "%n"? Issue a warning.
717 case 'n': {
718 ++numConversions;
719 CurrentState = state_OrdChr;
Chris Lattnerf17cb362009-02-18 17:49:48 +0000720 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
721 LastConversionIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000722
Chris Lattner9d2cf082008-11-19 05:27:50 +0000723 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000724 break;
725 }
Ted Kremenek225a14c2008-06-16 18:00:42 +0000726
727 // Handle "%@"
728 case '@':
729 // %@ is allowed in ObjC format strings only.
730 if(ObjCFExpr != NULL)
731 CurrentState = state_OrdChr;
732 else {
733 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000734 SourceLocation Loc =
735 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek225a14c2008-06-16 18:00:42 +0000736
Chris Lattner77d52da2008-11-20 06:06:08 +0000737 Diag(Loc, diag::warn_printf_invalid_conversion)
738 << std::string(Str+LastConversionIdx,
739 Str+std::min(LastConversionIdx+2, StrLen))
740 << OrigFormatExpr->getSourceRange();
Ted Kremenek225a14c2008-06-16 18:00:42 +0000741 }
742 ++numConversions;
743 break;
744
Chris Lattner68d88f02007-12-28 05:31:15 +0000745 // Handle "%%"
746 case '%':
747 // Sanity check: Was the first "%" character the previous one?
748 // If not, we will assume that we have a malformed format
749 // conversion, and that the current "%" character is the start
750 // of a new conversion.
751 if (StrIdx - LastConversionIdx == 1)
752 CurrentState = state_OrdChr;
753 else {
754 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000755 SourceLocation Loc =
756 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Chris Lattner68d88f02007-12-28 05:31:15 +0000757
Chris Lattner77d52da2008-11-20 06:06:08 +0000758 Diag(Loc, diag::warn_printf_invalid_conversion)
759 << std::string(Str+LastConversionIdx, Str+StrIdx)
760 << OrigFormatExpr->getSourceRange();
Chris Lattner68d88f02007-12-28 05:31:15 +0000761
762 // This conversion is broken. Advance to the next format
763 // conversion.
764 LastConversionIdx = StrIdx;
765 ++numConversions;
Ted Kremenek081ed872007-08-14 17:39:48 +0000766 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000767 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000768
Chris Lattner68d88f02007-12-28 05:31:15 +0000769 default:
770 // This case catches all other characters: flags, widths, etc.
771 // We should eventually process those as well.
772 break;
Ted Kremenek081ed872007-08-14 17:39:48 +0000773 }
774 }
775
776 if (CurrentState == state_Conversion) {
777 // Issue a warning: invalid format conversion.
Chris Lattnerf17cb362009-02-18 17:49:48 +0000778 SourceLocation Loc =
779 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek081ed872007-08-14 17:39:48 +0000780
Chris Lattner77d52da2008-11-20 06:06:08 +0000781 Diag(Loc, diag::warn_printf_invalid_conversion)
782 << std::string(Str+LastConversionIdx,
783 Str+std::min(LastConversionIdx+2, StrLen))
784 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000785 return;
786 }
787
788 if (!HasVAListArg) {
789 // CHECK: Does the number of format conversions exceed the number
790 // of data arguments?
791 if (numConversions > numDataArgs) {
Chris Lattnerf17cb362009-02-18 17:49:48 +0000792 SourceLocation Loc =
793 getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
Ted Kremenek081ed872007-08-14 17:39:48 +0000794
Chris Lattner9d2cf082008-11-19 05:27:50 +0000795 Diag(Loc, diag::warn_printf_insufficient_data_args)
796 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000797 }
798 // CHECK: Does the number of data arguments exceed the number of
799 // format conversions in the format string?
800 else if (numConversions < numDataArgs)
Chris Lattner83bd5eb2007-12-28 05:29:59 +0000801 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
Chris Lattner9d2cf082008-11-19 05:27:50 +0000802 diag::warn_printf_too_many_data_args)
803 << OrigFormatExpr->getSourceRange();
Ted Kremenek081ed872007-08-14 17:39:48 +0000804 }
805}
Ted Kremenek45925ab2007-08-17 16:46:58 +0000806
807//===--- CHECK: Return Address of Stack Variable --------------------------===//
808
809static DeclRefExpr* EvalVal(Expr *E);
810static DeclRefExpr* EvalAddr(Expr* E);
811
812/// CheckReturnStackAddr - Check if a return statement returns the address
813/// of a stack variable.
814void
815Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
816 SourceLocation ReturnLoc) {
Chris Lattner7a48d9c2008-02-13 01:02:39 +0000817
Ted Kremenek45925ab2007-08-17 16:46:58 +0000818 // Perform checking for returned stack addresses.
Steve Naroffd6163f32008-09-05 22:11:13 +0000819 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000820 if (DeclRefExpr *DR = EvalAddr(RetValExp))
Chris Lattner65cae292008-11-19 08:23:25 +0000821 Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
Chris Lattnerb1753422008-11-23 21:45:46 +0000822 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
Steve Naroff503996b2008-09-16 22:25:10 +0000823
824 // Skip over implicit cast expressions when checking for block expressions.
825 if (ImplicitCastExpr *IcExpr =
826 dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
827 RetValExp = IcExpr->getSubExpr();
828
Steve Naroff3eac7692008-09-10 19:17:48 +0000829 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
Chris Lattner9d2cf082008-11-19 05:27:50 +0000830 Diag(C->getLocStart(), diag::err_ret_local_block)
831 << C->getSourceRange();
Ted Kremenek45925ab2007-08-17 16:46:58 +0000832 }
833 // Perform checking for stack values returned by reference.
834 else if (lhsType->isReferenceType()) {
Douglas Gregor21a04f32008-10-27 19:41:14 +0000835 // Check for a reference to the stack
836 if (DeclRefExpr *DR = EvalVal(RetValExp))
Chris Lattner9d2cf082008-11-19 05:27:50 +0000837 Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
Chris Lattnerb1753422008-11-23 21:45:46 +0000838 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
Ted Kremenek45925ab2007-08-17 16:46:58 +0000839 }
840}
841
842/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
843/// check if the expression in a return statement evaluates to an address
844/// to a location on the stack. The recursion is used to traverse the
845/// AST of the return expression, with recursion backtracking when we
846/// encounter a subexpression that (1) clearly does not lead to the address
847/// of a stack variable or (2) is something we cannot determine leads to
848/// the address of a stack variable based on such local checking.
849///
Ted Kremenekda1300a2007-08-28 17:02:55 +0000850/// EvalAddr processes expressions that are pointers that are used as
851/// references (and not L-values). EvalVal handles all other values.
Ted Kremenek45925ab2007-08-17 16:46:58 +0000852/// At the base case of the recursion is a check for a DeclRefExpr* in
853/// the refers to a stack variable.
854///
855/// This implementation handles:
856///
857/// * pointer-to-pointer casts
858/// * implicit conversions from array references to pointers
859/// * taking the address of fields
860/// * arbitrary interplay between "&" and "*" operators
861/// * pointer arithmetic from an address of a stack variable
862/// * taking the address of an array element where the array is on the stack
863static DeclRefExpr* EvalAddr(Expr *E) {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000864 // We should only be called for evaluating pointer expressions.
Steve Naroffd6163f32008-09-05 22:11:13 +0000865 assert((E->getType()->isPointerType() ||
866 E->getType()->isBlockPointerType() ||
Ted Kremenek42730c52008-01-07 19:49:32 +0000867 E->getType()->isObjCQualifiedIdType()) &&
Chris Lattner68d88f02007-12-28 05:31:15 +0000868 "EvalAddr only works on pointers");
Ted Kremenek45925ab2007-08-17 16:46:58 +0000869
870 // Our "symbolic interpreter" is just a dispatch off the currently
871 // viewed AST node. We then recursively traverse the AST by calling
872 // EvalAddr and EvalVal appropriately.
873 switch (E->getStmtClass()) {
Chris Lattner68d88f02007-12-28 05:31:15 +0000874 case Stmt::ParenExprClass:
875 // Ignore parentheses.
876 return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
Ted Kremenek45925ab2007-08-17 16:46:58 +0000877
Chris Lattner68d88f02007-12-28 05:31:15 +0000878 case Stmt::UnaryOperatorClass: {
879 // The only unary operator that make sense to handle here
880 // is AddrOf. All others don't make sense as pointers.
881 UnaryOperator *U = cast<UnaryOperator>(E);
Ted Kremenek45925ab2007-08-17 16:46:58 +0000882
Chris Lattner68d88f02007-12-28 05:31:15 +0000883 if (U->getOpcode() == UnaryOperator::AddrOf)
884 return EvalVal(U->getSubExpr());
885 else
Ted Kremenek45925ab2007-08-17 16:46:58 +0000886 return NULL;
887 }
Chris Lattner68d88f02007-12-28 05:31:15 +0000888
889 case Stmt::BinaryOperatorClass: {
890 // Handle pointer arithmetic. All other binary operators are not valid
891 // in this context.
892 BinaryOperator *B = cast<BinaryOperator>(E);
893 BinaryOperator::Opcode op = B->getOpcode();
894
895 if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
896 return NULL;
897
898 Expr *Base = B->getLHS();
899
900 // Determine which argument is the real pointer base. It could be
901 // the RHS argument instead of the LHS.
902 if (!Base->getType()->isPointerType()) Base = B->getRHS();
903
904 assert (Base->getType()->isPointerType());
905 return EvalAddr(Base);
906 }
Steve Naroff3eac7692008-09-10 19:17:48 +0000907
Chris Lattner68d88f02007-12-28 05:31:15 +0000908 // For conditional operators we need to see if either the LHS or RHS are
909 // valid DeclRefExpr*s. If one of them is valid, we return it.
910 case Stmt::ConditionalOperatorClass: {
911 ConditionalOperator *C = cast<ConditionalOperator>(E);
912
913 // Handle the GNU extension for missing LHS.
914 if (Expr *lhsExpr = C->getLHS())
915 if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
916 return LHS;
917
918 return EvalAddr(C->getRHS());
919 }
920
Ted Kremenekea19edd2008-08-07 00:49:01 +0000921 // For casts, we need to handle conversions from arrays to
922 // pointer values, and pointer-to-pointer conversions.
Douglas Gregor21a04f32008-10-27 19:41:14 +0000923 case Stmt::ImplicitCastExprClass:
Douglas Gregor035d0882008-10-28 15:36:24 +0000924 case Stmt::CStyleCastExprClass:
Douglas Gregor21a04f32008-10-27 19:41:14 +0000925 case Stmt::CXXFunctionalCastExprClass: {
Argiris Kirtzidisc45e2fb2008-08-18 23:01:59 +0000926 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
Ted Kremenekea19edd2008-08-07 00:49:01 +0000927 QualType T = SubExpr->getType();
928
Steve Naroffd6163f32008-09-05 22:11:13 +0000929 if (SubExpr->getType()->isPointerType() ||
930 SubExpr->getType()->isBlockPointerType() ||
931 SubExpr->getType()->isObjCQualifiedIdType())
Ted Kremenekea19edd2008-08-07 00:49:01 +0000932 return EvalAddr(SubExpr);
933 else if (T->isArrayType())
Chris Lattner68d88f02007-12-28 05:31:15 +0000934 return EvalVal(SubExpr);
Chris Lattner68d88f02007-12-28 05:31:15 +0000935 else
Ted Kremenekea19edd2008-08-07 00:49:01 +0000936 return 0;
Chris Lattner68d88f02007-12-28 05:31:15 +0000937 }
938
939 // C++ casts. For dynamic casts, static casts, and const casts, we
940 // are always converting from a pointer-to-pointer, so we just blow
Douglas Gregor21a04f32008-10-27 19:41:14 +0000941 // through the cast. In the case the dynamic cast doesn't fail (and
942 // return NULL), we take the conservative route and report cases
Chris Lattner68d88f02007-12-28 05:31:15 +0000943 // where we return the address of a stack variable. For Reinterpre
Douglas Gregor21a04f32008-10-27 19:41:14 +0000944 // FIXME: The comment about is wrong; we're not always converting
945 // from pointer to pointer. I'm guessing that this code should also
946 // handle references to objects.
947 case Stmt::CXXStaticCastExprClass:
948 case Stmt::CXXDynamicCastExprClass:
949 case Stmt::CXXConstCastExprClass:
950 case Stmt::CXXReinterpretCastExprClass: {
951 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
Steve Naroffd6163f32008-09-05 22:11:13 +0000952 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
Chris Lattner68d88f02007-12-28 05:31:15 +0000953 return EvalAddr(S);
954 else
955 return NULL;
Chris Lattner68d88f02007-12-28 05:31:15 +0000956 }
957
958 // Everything else: we simply don't reason about them.
959 default:
960 return NULL;
961 }
Ted Kremenek45925ab2007-08-17 16:46:58 +0000962}
963
964
965/// EvalVal - This function is complements EvalAddr in the mutual recursion.
966/// See the comments for EvalAddr for more details.
967static DeclRefExpr* EvalVal(Expr *E) {
968
Ted Kremenekda1300a2007-08-28 17:02:55 +0000969 // We should only be called for evaluating non-pointer expressions, or
970 // expressions with a pointer type that are not used as references but instead
971 // are l-values (e.g., DeclRefExpr with a pointer type).
972
Ted Kremenek45925ab2007-08-17 16:46:58 +0000973 // Our "symbolic interpreter" is just a dispatch off the currently
974 // viewed AST node. We then recursively traverse the AST by calling
975 // EvalAddr and EvalVal appropriately.
976 switch (E->getStmtClass()) {
Douglas Gregor566782a2009-01-06 05:10:23 +0000977 case Stmt::DeclRefExprClass:
978 case Stmt::QualifiedDeclRefExprClass: {
Ted Kremenek45925ab2007-08-17 16:46:58 +0000979 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
980 // at code that refers to a variable's name. We check if it has local
981 // storage within the function, and if so, return the expression.
982 DeclRefExpr *DR = cast<DeclRefExpr>(E);
983
984 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
Douglas Gregor81c29152008-10-29 00:13:59 +0000985 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
Ted Kremenek45925ab2007-08-17 16:46:58 +0000986
987 return NULL;
988 }
989
990 case Stmt::ParenExprClass:
991 // Ignore parentheses.
992 return EvalVal(cast<ParenExpr>(E)->getSubExpr());
993
994 case Stmt::UnaryOperatorClass: {
995 // The only unary operator that make sense to handle here
996 // is Deref. All others don't resolve to a "name." This includes
997 // handling all sorts of rvalues passed to a unary operator.
998 UnaryOperator *U = cast<UnaryOperator>(E);
999
1000 if (U->getOpcode() == UnaryOperator::Deref)
1001 return EvalAddr(U->getSubExpr());
1002
1003 return NULL;
1004 }
1005
1006 case Stmt::ArraySubscriptExprClass: {
1007 // Array subscripts are potential references to data on the stack. We
1008 // retrieve the DeclRefExpr* for the array variable if it indeed
1009 // has local storage.
Ted Kremenek1c1700f2007-08-20 16:18:38 +00001010 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
Ted Kremenek45925ab2007-08-17 16:46:58 +00001011 }
1012
1013 case Stmt::ConditionalOperatorClass: {
1014 // For conditional operators we need to see if either the LHS or RHS are
1015 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
1016 ConditionalOperator *C = cast<ConditionalOperator>(E);
1017
Anders Carlsson37365fc2007-11-30 19:04:31 +00001018 // Handle the GNU extension for missing LHS.
1019 if (Expr *lhsExpr = C->getLHS())
1020 if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1021 return LHS;
1022
1023 return EvalVal(C->getRHS());
Ted Kremenek45925ab2007-08-17 16:46:58 +00001024 }
1025
1026 // Accesses to members are potential references to data on the stack.
1027 case Stmt::MemberExprClass: {
1028 MemberExpr *M = cast<MemberExpr>(E);
1029
1030 // Check for indirect access. We only want direct field accesses.
1031 if (!M->isArrow())
1032 return EvalVal(M->getBase());
1033 else
1034 return NULL;
1035 }
1036
1037 // Everything else: we simply don't reason about them.
1038 default:
1039 return NULL;
1040 }
1041}
Ted Kremenek30c66752007-11-25 00:58:00 +00001042
1043//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1044
1045/// Check for comparisons of floating point operands using != and ==.
1046/// Issue a warning if these are no self-comparisons, as they are not likely
1047/// to do what the programmer intended.
1048void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1049 bool EmitWarning = true;
1050
Ted Kremenek87e30c52008-01-17 16:57:34 +00001051 Expr* LeftExprSansParen = lex->IgnoreParens();
Ted Kremenek24c61682008-01-17 17:55:13 +00001052 Expr* RightExprSansParen = rex->IgnoreParens();
Ted Kremenek30c66752007-11-25 00:58:00 +00001053
1054 // Special case: check for x == x (which is OK).
1055 // Do not emit warnings for such cases.
1056 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1057 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1058 if (DRL->getDecl() == DRR->getDecl())
1059 EmitWarning = false;
1060
Ted Kremenek33159832007-11-29 00:59:04 +00001061
1062 // Special case: check for comparisons against literals that can be exactly
1063 // represented by APFloat. In such cases, do not emit a warning. This
1064 // is a heuristic: often comparison against such literals are used to
1065 // detect if a value in a variable has not changed. This clearly can
1066 // lead to false negatives.
1067 if (EmitWarning) {
1068 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1069 if (FLL->isExact())
1070 EmitWarning = false;
1071 }
1072 else
1073 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1074 if (FLR->isExact())
1075 EmitWarning = false;
1076 }
1077 }
1078
Ted Kremenek30c66752007-11-25 00:58:00 +00001079 // Check for comparisons with builtin types.
Sebastian Redl8b769972009-01-19 00:08:26 +00001080 if (EmitWarning)
Ted Kremenek30c66752007-11-25 00:58:00 +00001081 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
Douglas Gregorb5af7382009-02-14 18:57:46 +00001082 if (CL->isBuiltinCall(Context))
Ted Kremenek30c66752007-11-25 00:58:00 +00001083 EmitWarning = false;
1084
Sebastian Redl8b769972009-01-19 00:08:26 +00001085 if (EmitWarning)
Ted Kremenek30c66752007-11-25 00:58:00 +00001086 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
Douglas Gregorb5af7382009-02-14 18:57:46 +00001087 if (CR->isBuiltinCall(Context))
Ted Kremenek30c66752007-11-25 00:58:00 +00001088 EmitWarning = false;
1089
1090 // Emit the diagnostic.
1091 if (EmitWarning)
Chris Lattner8ba580c2008-11-19 05:08:23 +00001092 Diag(loc, diag::warn_floatingpoint_eq)
1093 << lex->getSourceRange() << rex->getSourceRange();
Ted Kremenek30c66752007-11-25 00:58:00 +00001094}