| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 1 | //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
| Chris Lattner | 0bc735f | 2007-12-29 19:59:25 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | //  This file implements extra semantic analysis beyond what is enforced | 
|  | 11 | //  by the C type system. | 
|  | 12 | // | 
|  | 13 | //===----------------------------------------------------------------------===// | 
|  | 14 |  | 
|  | 15 | #include "Sema.h" | 
|  | 16 | #include "clang/AST/ASTContext.h" | 
|  | 17 | #include "clang/AST/Decl.h" | 
|  | 18 | #include "clang/AST/Expr.h" | 
| Ted Kremenek | 2324512 | 2007-08-20 16:18:38 +0000 | [diff] [blame] | 19 | #include "clang/AST/ExprCXX.h" | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 20 | #include "clang/Lex/Preprocessor.h" | 
|  | 21 | #include "clang/Lex/LiteralSupport.h" | 
|  | 22 | #include "clang/Basic/SourceManager.h" | 
|  | 23 | #include "clang/Basic/Diagnostic.h" | 
|  | 24 | #include "clang/Basic/LangOptions.h" | 
|  | 25 | #include "clang/Basic/TargetInfo.h" | 
|  | 26 | #include "llvm/ADT/SmallString.h" | 
|  | 27 | #include "llvm/ADT/StringExtras.h" | 
| Ted Kremenek | 588e5eb | 2007-11-25 00:58:00 +0000 | [diff] [blame] | 28 | #include "SemaUtil.h" | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 29 | using namespace clang; | 
|  | 30 |  | 
|  | 31 | /// CheckFunctionCall - Check a direct function call for various correctness | 
|  | 32 | /// and safety properties not strictly enforced by the C type system. | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 33 | bool | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 34 | Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 35 |  | 
|  | 36 | // Get the IdentifierInfo* for the called function. | 
|  | 37 | IdentifierInfo *FnInfo = FDecl->getIdentifier(); | 
|  | 38 |  | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 39 | switch (FnInfo->getBuiltinID()) { | 
|  | 40 | case Builtin::BI__builtin___CFStringMakeConstantString: | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 41 | assert(TheCall->getNumArgs() == 1 && | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 42 | "Wrong # arguments to builtin CFStringMakeConstantString"); | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 43 | return CheckBuiltinCFStringArgument(TheCall->getArg(0)); | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 44 | case Builtin::BI__builtin_va_start: | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 45 | return SemaBuiltinVAStart(TheCall); | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 46 |  | 
|  | 47 | case Builtin::BI__builtin_isgreater: | 
|  | 48 | case Builtin::BI__builtin_isgreaterequal: | 
|  | 49 | case Builtin::BI__builtin_isless: | 
|  | 50 | case Builtin::BI__builtin_islessequal: | 
|  | 51 | case Builtin::BI__builtin_islessgreater: | 
|  | 52 | case Builtin::BI__builtin_isunordered: | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 53 | return SemaBuiltinUnorderedCompare(TheCall); | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 54 | } | 
|  | 55 |  | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 56 | // Search the KnownFunctionIDs for the identifier. | 
|  | 57 | unsigned i = 0, e = id_num_known_functions; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 58 | for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } | 
| Anders Carlsson | 9cdc4d3 | 2007-08-17 15:44:17 +0000 | [diff] [blame] | 59 | if (i == e) return false; | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 60 |  | 
|  | 61 | // Printf checking. | 
|  | 62 | if (i <= id_vprintf) { | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 63 | // Retrieve the index of the format string parameter and determine | 
|  | 64 | // if the function is passed a va_arg argument. | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 65 | unsigned format_idx = 0; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 66 | bool HasVAListArg = false; | 
|  | 67 |  | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 68 | switch (i) { | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 69 | default: assert(false && "No format string argument index."); | 
|  | 70 | case id_printf:    format_idx = 0; break; | 
|  | 71 | case id_fprintf:   format_idx = 1; break; | 
|  | 72 | case id_sprintf:   format_idx = 1; break; | 
|  | 73 | case id_snprintf:  format_idx = 2; break; | 
|  | 74 | case id_asprintf:  format_idx = 1; break; | 
|  | 75 | case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; | 
|  | 76 | case id_vasprintf: format_idx = 1; HasVAListArg = true; break; | 
|  | 77 | case id_vfprintf:  format_idx = 1; HasVAListArg = true; break; | 
|  | 78 | case id_vsprintf:  format_idx = 1; HasVAListArg = true; break; | 
|  | 79 | case id_vprintf:   format_idx = 0; HasVAListArg = true; break; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 80 | } | 
|  | 81 |  | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 82 | CheckPrintfArguments(TheCall, HasVAListArg, format_idx); | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 83 | } | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 84 |  | 
| Anders Carlsson | 9cdc4d3 | 2007-08-17 15:44:17 +0000 | [diff] [blame] | 85 | return false; | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 86 | } | 
|  | 87 |  | 
|  | 88 | /// CheckBuiltinCFStringArgument - Checks that the argument to the builtin | 
|  | 89 | /// CFString constructor is correct | 
| Chris Lattner | cc6f65d | 2007-08-25 05:30:33 +0000 | [diff] [blame] | 90 | bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { | 
| Chris Lattner | 998568f | 2007-12-28 05:38:24 +0000 | [diff] [blame] | 91 | Arg = IgnoreParenCasts(Arg); | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 92 |  | 
|  | 93 | StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); | 
|  | 94 |  | 
|  | 95 | if (!Literal || Literal->isWide()) { | 
|  | 96 | Diag(Arg->getLocStart(), | 
|  | 97 | diag::err_cfstring_literal_not_string_constant, | 
|  | 98 | Arg->getSourceRange()); | 
| Anders Carlsson | 9cdc4d3 | 2007-08-17 15:44:17 +0000 | [diff] [blame] | 99 | return true; | 
| Anders Carlsson | 71993dd | 2007-08-17 05:31:46 +0000 | [diff] [blame] | 100 | } | 
|  | 101 |  | 
|  | 102 | const char *Data = Literal->getStrData(); | 
|  | 103 | unsigned Length = Literal->getByteLength(); | 
|  | 104 |  | 
|  | 105 | for (unsigned i = 0; i < Length; ++i) { | 
|  | 106 | if (!isascii(Data[i])) { | 
|  | 107 | Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), | 
|  | 108 | diag::warn_cfstring_literal_contains_non_ascii_character, | 
|  | 109 | Arg->getSourceRange()); | 
|  | 110 | break; | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | if (!Data[i]) { | 
|  | 114 | Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), | 
|  | 115 | diag::warn_cfstring_literal_contains_nul_character, | 
|  | 116 | Arg->getSourceRange()); | 
|  | 117 | break; | 
|  | 118 | } | 
|  | 119 | } | 
|  | 120 |  | 
| Anders Carlsson | 9cdc4d3 | 2007-08-17 15:44:17 +0000 | [diff] [blame] | 121 | return false; | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 122 | } | 
|  | 123 |  | 
| Chris Lattner | c27c665 | 2007-12-20 00:05:45 +0000 | [diff] [blame] | 124 | /// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. | 
|  | 125 | /// Emit an error and return true on failure, return false on success. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 126 | bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { | 
|  | 127 | Expr *Fn = TheCall->getCallee(); | 
|  | 128 | if (TheCall->getNumArgs() > 2) { | 
|  | 129 | Diag(TheCall->getArg(2)->getLocStart(), | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 130 | diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 131 | SourceRange(TheCall->getArg(2)->getLocStart(), | 
|  | 132 | (*(TheCall->arg_end()-1))->getLocEnd())); | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 133 | return true; | 
|  | 134 | } | 
|  | 135 |  | 
| Chris Lattner | c27c665 | 2007-12-20 00:05:45 +0000 | [diff] [blame] | 136 | // Determine whether the current function is variadic or not. | 
|  | 137 | bool isVariadic; | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 138 | if (CurFunctionDecl) | 
| Chris Lattner | c27c665 | 2007-12-20 00:05:45 +0000 | [diff] [blame] | 139 | isVariadic = | 
|  | 140 | cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic(); | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 141 | else | 
| Chris Lattner | c27c665 | 2007-12-20 00:05:45 +0000 | [diff] [blame] | 142 | isVariadic = CurMethodDecl->isVariadic(); | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 143 |  | 
| Chris Lattner | c27c665 | 2007-12-20 00:05:45 +0000 | [diff] [blame] | 144 | if (!isVariadic) { | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 145 | Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); | 
|  | 146 | return true; | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | // Verify that the second argument to the builtin is the last argument of the | 
|  | 150 | // current function or method. | 
|  | 151 | bool SecondArgIsLastNamedArgument = false; | 
| Anders Carlsson | 88cf226 | 2008-02-11 04:20:54 +0000 | [diff] [blame] | 152 | const Expr *Arg = TheCall->getArg(1); | 
|  | 153 | while (1) { | 
|  | 154 | if (const ParenExpr *PE = dyn_cast<ParenExpr>(Arg)) | 
|  | 155 | Arg = PE->getSubExpr(); | 
|  | 156 | else if (const ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(Arg)) | 
|  | 157 | Arg = CE->getSubExpr(); | 
|  | 158 | else | 
|  | 159 | break; | 
|  | 160 | } | 
|  | 161 |  | 
|  | 162 | if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { | 
|  | 163 | if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 164 | // FIXME: This isn't correct for methods (results in bogus warning). | 
|  | 165 | // Get the last formal in the current function. | 
| Anders Carlsson | 88cf226 | 2008-02-11 04:20:54 +0000 | [diff] [blame] | 166 | const ParmVarDecl *LastArg; | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 167 | if (CurFunctionDecl) | 
|  | 168 | LastArg = *(CurFunctionDecl->param_end()-1); | 
|  | 169 | else | 
|  | 170 | LastArg = *(CurMethodDecl->param_end()-1); | 
|  | 171 | SecondArgIsLastNamedArgument = PV == LastArg; | 
|  | 172 | } | 
|  | 173 | } | 
|  | 174 |  | 
|  | 175 | if (!SecondArgIsLastNamedArgument) | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 176 | Diag(TheCall->getArg(1)->getLocStart(), | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 177 | diag::warn_second_parameter_of_va_start_not_last_named_argument); | 
|  | 178 | return false; | 
|  | 179 | } | 
|  | 180 |  | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 181 | /// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and | 
|  | 182 | /// friends.  This is declared to take (...), so we have to check everything. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 183 | bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { | 
|  | 184 | if (TheCall->getNumArgs() < 2) | 
|  | 185 | return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); | 
|  | 186 | if (TheCall->getNumArgs() > 2) | 
|  | 187 | return Diag(TheCall->getArg(2)->getLocStart(), | 
|  | 188 | diag::err_typecheck_call_too_many_args, | 
|  | 189 | SourceRange(TheCall->getArg(2)->getLocStart(), | 
|  | 190 | (*(TheCall->arg_end()-1))->getLocEnd())); | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 191 |  | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 192 | Expr *OrigArg0 = TheCall->getArg(0); | 
|  | 193 | Expr *OrigArg1 = TheCall->getArg(1); | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 194 |  | 
|  | 195 | // Do standard promotions between the two arguments, returning their common | 
|  | 196 | // type. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 197 | QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 198 |  | 
|  | 199 | // If the common type isn't a real floating type, then the arguments were | 
|  | 200 | // invalid for this operation. | 
|  | 201 | if (!Res->isRealFloatingType()) | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 202 | return Diag(OrigArg0->getLocStart(), | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 203 | diag::err_typecheck_call_invalid_ordered_compare, | 
|  | 204 | OrigArg0->getType().getAsString(), | 
|  | 205 | OrigArg1->getType().getAsString(), | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 206 | SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); | 
| Chris Lattner | 1b9a079 | 2007-12-20 00:26:33 +0000 | [diff] [blame] | 207 |  | 
|  | 208 | return false; | 
|  | 209 | } | 
|  | 210 |  | 
| Chris Lattner | 30ce344 | 2007-12-19 23:59:04 +0000 | [diff] [blame] | 211 |  | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 212 | /// CheckPrintfArguments - Check calls to printf (and similar functions) for | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 213 | /// correct use of format strings. | 
|  | 214 | /// | 
|  | 215 | ///  HasVAListArg - A predicate indicating whether the printf-like | 
|  | 216 | ///    function is passed an explicit va_arg argument (e.g., vprintf) | 
|  | 217 | /// | 
|  | 218 | ///  format_idx - The index into Args for the format string. | 
|  | 219 | /// | 
|  | 220 | /// Improper format strings to functions in the printf family can be | 
|  | 221 | /// the source of bizarre bugs and very serious security holes.  A | 
|  | 222 | /// good source of information is available in the following paper | 
|  | 223 | /// (which includes additional references): | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 224 | /// | 
|  | 225 | ///  FormatGuard: Automatic Protection From printf Format String | 
|  | 226 | ///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 227 | /// | 
|  | 228 | /// Functionality implemented: | 
|  | 229 | /// | 
|  | 230 | ///  We can statically check the following properties for string | 
|  | 231 | ///  literal format strings for non v.*printf functions (where the | 
|  | 232 | ///  arguments are passed directly): | 
|  | 233 | // | 
|  | 234 | ///  (1) Are the number of format conversions equal to the number of | 
|  | 235 | ///      data arguments? | 
|  | 236 | /// | 
|  | 237 | ///  (2) Does each format conversion correctly match the type of the | 
|  | 238 | ///      corresponding data argument?  (TODO) | 
|  | 239 | /// | 
|  | 240 | /// Moreover, for all printf functions we can: | 
|  | 241 | /// | 
|  | 242 | ///  (3) Check for a missing format string (when not caught by type checking). | 
|  | 243 | /// | 
|  | 244 | ///  (4) Check for no-operation flags; e.g. using "#" with format | 
|  | 245 | ///      conversion 'c'  (TODO) | 
|  | 246 | /// | 
|  | 247 | ///  (5) Check the use of '%n', a major source of security holes. | 
|  | 248 | /// | 
|  | 249 | ///  (6) Check for malformed format conversions that don't specify anything. | 
|  | 250 | /// | 
|  | 251 | ///  (7) Check for empty format strings.  e.g: printf(""); | 
|  | 252 | /// | 
|  | 253 | ///  (8) Check that the format string is a wide literal. | 
|  | 254 | /// | 
|  | 255 | /// All of these checks can be done by parsing the format string. | 
|  | 256 | /// | 
|  | 257 | /// For now, we ONLY do (1), (3), (5), (6), (7), and (8). | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 258 | void | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 259 | Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, | 
|  | 260 | unsigned format_idx) { | 
|  | 261 | Expr *Fn = TheCall->getCallee(); | 
|  | 262 |  | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 263 | // CHECK: printf-like function is called with no format string. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 264 | if (format_idx >= TheCall->getNumArgs()) { | 
|  | 265 | Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 266 | Fn->getSourceRange()); | 
|  | 267 | return; | 
|  | 268 | } | 
|  | 269 |  | 
| Chris Lattner | 998568f | 2007-12-28 05:38:24 +0000 | [diff] [blame] | 270 | Expr *OrigFormatExpr = IgnoreParenCasts(TheCall->getArg(format_idx)); | 
| Chris Lattner | 459e848 | 2007-08-25 05:36:18 +0000 | [diff] [blame] | 271 |  | 
| Chris Lattner | 59907c4 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 272 | // CHECK: format string is not a string literal. | 
|  | 273 | // | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 274 | // Dynamically generated format strings are difficult to | 
|  | 275 | // automatically vet at compile time.  Requiring that format strings | 
|  | 276 | // are string literals: (1) permits the checking of format strings by | 
|  | 277 | // the compiler and thereby (2) can practically remove the source of | 
|  | 278 | // many format string exploits. | 
| Chris Lattner | 459e848 | 2007-08-25 05:36:18 +0000 | [diff] [blame] | 279 | StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 280 | if (FExpr == NULL) { | 
| Ted Kremenek | 4a33646 | 2007-12-17 19:03:13 +0000 | [diff] [blame] | 281 | // For vprintf* functions (i.e., HasVAListArg==true), we add a | 
|  | 282 | // special check to see if the format string is a function parameter | 
|  | 283 | // of the function calling the printf function.  If the function | 
|  | 284 | // has an attribute indicating it is a printf-like function, then we | 
|  | 285 | // should suppress warnings concerning non-literals being used in a call | 
|  | 286 | // to a vprintf function.  For example: | 
|  | 287 | // | 
|  | 288 | // void | 
|  | 289 | // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { | 
|  | 290 | //      va_list ap; | 
|  | 291 | //      va_start(ap, fmt); | 
|  | 292 | //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt". | 
|  | 293 | //      ... | 
|  | 294 | // | 
|  | 295 | // | 
|  | 296 | //  FIXME: We don't have full attribute support yet, so just check to see | 
|  | 297 | //    if the argument is a DeclRefExpr that references a parameter.  We'll | 
|  | 298 | //    add proper support for checking the attribute later. | 
|  | 299 | if (HasVAListArg) | 
| Chris Lattner | 998568f | 2007-12-28 05:38:24 +0000 | [diff] [blame] | 300 | if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) | 
|  | 301 | if (isa<ParmVarDecl>(DR->getDecl())) | 
| Ted Kremenek | 4a33646 | 2007-12-17 19:03:13 +0000 | [diff] [blame] | 302 | return; | 
|  | 303 |  | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 304 | Diag(TheCall->getArg(format_idx)->getLocStart(), | 
|  | 305 | diag::warn_printf_not_string_constant, Fn->getSourceRange()); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 306 | return; | 
|  | 307 | } | 
|  | 308 |  | 
|  | 309 | // CHECK: is the format string a wide literal? | 
|  | 310 | if (FExpr->isWide()) { | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 311 | Diag(FExpr->getLocStart(), | 
|  | 312 | diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 313 | return; | 
|  | 314 | } | 
|  | 315 |  | 
|  | 316 | // Str - The format string.  NOTE: this is NOT null-terminated! | 
|  | 317 | const char * const Str = FExpr->getStrData(); | 
|  | 318 |  | 
|  | 319 | // CHECK: empty format string? | 
|  | 320 | const unsigned StrLen = FExpr->getByteLength(); | 
|  | 321 |  | 
|  | 322 | if (StrLen == 0) { | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 323 | Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, | 
|  | 324 | Fn->getSourceRange()); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 325 | return; | 
|  | 326 | } | 
|  | 327 |  | 
|  | 328 | // We process the format string using a binary state machine.  The | 
|  | 329 | // current state is stored in CurrentState. | 
|  | 330 | enum { | 
|  | 331 | state_OrdChr, | 
|  | 332 | state_Conversion | 
|  | 333 | } CurrentState = state_OrdChr; | 
|  | 334 |  | 
|  | 335 | // numConversions - The number of conversions seen so far.  This is | 
|  | 336 | //  incremented as we traverse the format string. | 
|  | 337 | unsigned numConversions = 0; | 
|  | 338 |  | 
|  | 339 | // numDataArgs - The number of data arguments after the format | 
|  | 340 | //  string.  This can only be determined for non vprintf-like | 
|  | 341 | //  functions.  For those functions, this value is 1 (the sole | 
|  | 342 | //  va_arg argument). | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 343 | unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 344 |  | 
|  | 345 | // Inspect the format string. | 
|  | 346 | unsigned StrIdx = 0; | 
|  | 347 |  | 
|  | 348 | // LastConversionIdx - Index within the format string where we last saw | 
|  | 349 | //  a '%' character that starts a new format conversion. | 
|  | 350 | unsigned LastConversionIdx = 0; | 
|  | 351 |  | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 352 | for (; StrIdx < StrLen; ++StrIdx) { | 
| Chris Lattner | 998568f | 2007-12-28 05:38:24 +0000 | [diff] [blame] | 353 |  | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 354 | // Is the number of detected conversion conversions greater than | 
|  | 355 | // the number of matching data arguments?  If so, stop. | 
|  | 356 | if (!HasVAListArg && numConversions > numDataArgs) break; | 
|  | 357 |  | 
|  | 358 | // Handle "\0" | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 359 | if (Str[StrIdx] == '\0') { | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 360 | // The string returned by getStrData() is not null-terminated, | 
|  | 361 | // so the presence of a null character is likely an error. | 
| Chris Lattner | 998568f | 2007-12-28 05:38:24 +0000 | [diff] [blame] | 362 | Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), | 
|  | 363 | diag::warn_printf_format_string_contains_null_char, | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 364 | Fn->getSourceRange()); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 365 | return; | 
|  | 366 | } | 
|  | 367 |  | 
|  | 368 | // Ordinary characters (not processing a format conversion). | 
|  | 369 | if (CurrentState == state_OrdChr) { | 
|  | 370 | if (Str[StrIdx] == '%') { | 
|  | 371 | CurrentState = state_Conversion; | 
|  | 372 | LastConversionIdx = StrIdx; | 
|  | 373 | } | 
|  | 374 | continue; | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | // Seen '%'.  Now processing a format conversion. | 
|  | 378 | switch (Str[StrIdx]) { | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 379 | // Handle dynamic precision or width specifier. | 
|  | 380 | case '*': { | 
|  | 381 | ++numConversions; | 
|  | 382 |  | 
|  | 383 | if (!HasVAListArg && numConversions > numDataArgs) { | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 384 | SourceLocation Loc = FExpr->getLocStart(); | 
|  | 385 | Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); | 
| Ted Kremenek | 580b664 | 2007-10-12 20:51:52 +0000 | [diff] [blame] | 386 |  | 
| Ted Kremenek | 580b664 | 2007-10-12 20:51:52 +0000 | [diff] [blame] | 387 | if (Str[StrIdx-1] == '.') | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 388 | Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, | 
|  | 389 | Fn->getSourceRange()); | 
| Ted Kremenek | 580b664 | 2007-10-12 20:51:52 +0000 | [diff] [blame] | 390 | else | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 391 | Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, | 
|  | 392 | Fn->getSourceRange()); | 
| Ted Kremenek | 580b664 | 2007-10-12 20:51:52 +0000 | [diff] [blame] | 393 |  | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 394 | // Don't do any more checking.  We'll just emit spurious errors. | 
|  | 395 | return; | 
| Ted Kremenek | 580b664 | 2007-10-12 20:51:52 +0000 | [diff] [blame] | 396 | } | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 397 |  | 
|  | 398 | // Perform type checking on width/precision specifier. | 
|  | 399 | Expr *E = TheCall->getArg(format_idx+numConversions); | 
|  | 400 | if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) | 
|  | 401 | if (BT->getKind() == BuiltinType::Int) | 
|  | 402 | break; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 403 |  | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 404 | SourceLocation Loc = | 
|  | 405 | PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); | 
|  | 406 |  | 
|  | 407 | if (Str[StrIdx-1] == '.') | 
|  | 408 | Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, | 
|  | 409 | E->getType().getAsString(), E->getSourceRange()); | 
|  | 410 | else | 
|  | 411 | Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, | 
|  | 412 | E->getType().getAsString(), E->getSourceRange()); | 
|  | 413 |  | 
|  | 414 | break; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | // Characters which can terminate a format conversion | 
|  | 418 | // (e.g. "%d").  Characters that specify length modifiers or | 
|  | 419 | // other flags are handled by the default case below. | 
|  | 420 | // | 
|  | 421 | // FIXME: additional checks will go into the following cases. | 
|  | 422 | case 'i': | 
|  | 423 | case 'd': | 
|  | 424 | case 'o': | 
|  | 425 | case 'u': | 
|  | 426 | case 'x': | 
|  | 427 | case 'X': | 
|  | 428 | case 'D': | 
|  | 429 | case 'O': | 
|  | 430 | case 'U': | 
|  | 431 | case 'e': | 
|  | 432 | case 'E': | 
|  | 433 | case 'f': | 
|  | 434 | case 'F': | 
|  | 435 | case 'g': | 
|  | 436 | case 'G': | 
|  | 437 | case 'a': | 
|  | 438 | case 'A': | 
|  | 439 | case 'c': | 
|  | 440 | case 'C': | 
|  | 441 | case 'S': | 
|  | 442 | case 's': | 
|  | 443 | case 'p': | 
|  | 444 | ++numConversions; | 
|  | 445 | CurrentState = state_OrdChr; | 
|  | 446 | break; | 
|  | 447 |  | 
|  | 448 | // CHECK: Are we using "%n"?  Issue a warning. | 
|  | 449 | case 'n': { | 
|  | 450 | ++numConversions; | 
|  | 451 | CurrentState = state_OrdChr; | 
|  | 452 | SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), | 
|  | 453 | LastConversionIdx+1); | 
|  | 454 |  | 
|  | 455 | Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); | 
|  | 456 | break; | 
|  | 457 | } | 
|  | 458 |  | 
|  | 459 | // Handle "%%" | 
|  | 460 | case '%': | 
|  | 461 | // Sanity check: Was the first "%" character the previous one? | 
|  | 462 | // If not, we will assume that we have a malformed format | 
|  | 463 | // conversion, and that the current "%" character is the start | 
|  | 464 | // of a new conversion. | 
|  | 465 | if (StrIdx - LastConversionIdx == 1) | 
|  | 466 | CurrentState = state_OrdChr; | 
|  | 467 | else { | 
|  | 468 | // Issue a warning: invalid format conversion. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 469 | SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), | 
|  | 470 | LastConversionIdx+1); | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 471 |  | 
|  | 472 | Diag(Loc, diag::warn_printf_invalid_conversion, | 
|  | 473 | std::string(Str+LastConversionIdx, Str+StrIdx), | 
|  | 474 | Fn->getSourceRange()); | 
|  | 475 |  | 
|  | 476 | // This conversion is broken.  Advance to the next format | 
|  | 477 | // conversion. | 
|  | 478 | LastConversionIdx = StrIdx; | 
|  | 479 | ++numConversions; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 480 | } | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 481 | break; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 482 |  | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 483 | default: | 
|  | 484 | // This case catches all other characters: flags, widths, etc. | 
|  | 485 | // We should eventually process those as well. | 
|  | 486 | break; | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 487 | } | 
|  | 488 | } | 
|  | 489 |  | 
|  | 490 | if (CurrentState == state_Conversion) { | 
|  | 491 | // Issue a warning: invalid format conversion. | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 492 | SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), | 
|  | 493 | LastConversionIdx+1); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 494 |  | 
|  | 495 | Diag(Loc, diag::warn_printf_invalid_conversion, | 
| Chris Lattner | a9e2ea1 | 2007-08-26 17:38:22 +0000 | [diff] [blame] | 496 | std::string(Str+LastConversionIdx, | 
|  | 497 | Str+std::min(LastConversionIdx+2, StrLen)), | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 498 | Fn->getSourceRange()); | 
|  | 499 | return; | 
|  | 500 | } | 
|  | 501 |  | 
|  | 502 | if (!HasVAListArg) { | 
|  | 503 | // CHECK: Does the number of format conversions exceed the number | 
|  | 504 | //        of data arguments? | 
|  | 505 | if (numConversions > numDataArgs) { | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 506 | SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), | 
|  | 507 | LastConversionIdx); | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 508 |  | 
|  | 509 | Diag(Loc, diag::warn_printf_insufficient_data_args, | 
|  | 510 | Fn->getSourceRange()); | 
|  | 511 | } | 
|  | 512 | // CHECK: Does the number of data arguments exceed the number of | 
|  | 513 | //        format conversions in the format string? | 
|  | 514 | else if (numConversions < numDataArgs) | 
| Chris Lattner | 925e60d | 2007-12-28 05:29:59 +0000 | [diff] [blame] | 515 | Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), | 
| Ted Kremenek | 71895b9 | 2007-08-14 17:39:48 +0000 | [diff] [blame] | 516 | diag::warn_printf_too_many_data_args, Fn->getSourceRange()); | 
|  | 517 | } | 
|  | 518 | } | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 519 |  | 
|  | 520 | //===--- CHECK: Return Address of Stack Variable --------------------------===// | 
|  | 521 |  | 
|  | 522 | static DeclRefExpr* EvalVal(Expr *E); | 
|  | 523 | static DeclRefExpr* EvalAddr(Expr* E); | 
|  | 524 |  | 
|  | 525 | /// CheckReturnStackAddr - Check if a return statement returns the address | 
|  | 526 | ///   of a stack variable. | 
|  | 527 | void | 
|  | 528 | Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, | 
|  | 529 | SourceLocation ReturnLoc) { | 
|  | 530 |  | 
|  | 531 | // Perform checking for returned stack addresses. | 
|  | 532 | if (lhsType->isPointerType()) { | 
|  | 533 | if (DeclRefExpr *DR = EvalAddr(RetValExp)) | 
|  | 534 | Diag(DR->getLocStart(), diag::warn_ret_stack_addr, | 
|  | 535 | DR->getDecl()->getIdentifier()->getName(), | 
|  | 536 | RetValExp->getSourceRange()); | 
|  | 537 | } | 
|  | 538 | // Perform checking for stack values returned by reference. | 
|  | 539 | else if (lhsType->isReferenceType()) { | 
| Ted Kremenek | 96eabe0 | 2007-08-27 16:39:17 +0000 | [diff] [blame] | 540 | // Check for an implicit cast to a reference. | 
|  | 541 | if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) | 
|  | 542 | if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) | 
|  | 543 | Diag(DR->getLocStart(), diag::warn_ret_stack_ref, | 
|  | 544 | DR->getDecl()->getIdentifier()->getName(), | 
|  | 545 | RetValExp->getSourceRange()); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 546 | } | 
|  | 547 | } | 
|  | 548 |  | 
|  | 549 | /// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that | 
|  | 550 | ///  check if the expression in a return statement evaluates to an address | 
|  | 551 | ///  to a location on the stack.  The recursion is used to traverse the | 
|  | 552 | ///  AST of the return expression, with recursion backtracking when we | 
|  | 553 | ///  encounter a subexpression that (1) clearly does not lead to the address | 
|  | 554 | ///  of a stack variable or (2) is something we cannot determine leads to | 
|  | 555 | ///  the address of a stack variable based on such local checking. | 
|  | 556 | /// | 
| Ted Kremenek | e8c600f | 2007-08-28 17:02:55 +0000 | [diff] [blame] | 557 | ///  EvalAddr processes expressions that are pointers that are used as | 
|  | 558 | ///  references (and not L-values).  EvalVal handles all other values. | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 559 | ///  At the base case of the recursion is a check for a DeclRefExpr* in | 
|  | 560 | ///  the refers to a stack variable. | 
|  | 561 | /// | 
|  | 562 | ///  This implementation handles: | 
|  | 563 | /// | 
|  | 564 | ///   * pointer-to-pointer casts | 
|  | 565 | ///   * implicit conversions from array references to pointers | 
|  | 566 | ///   * taking the address of fields | 
|  | 567 | ///   * arbitrary interplay between "&" and "*" operators | 
|  | 568 | ///   * pointer arithmetic from an address of a stack variable | 
|  | 569 | ///   * taking the address of an array element where the array is on the stack | 
|  | 570 | static DeclRefExpr* EvalAddr(Expr *E) { | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 571 | // We should only be called for evaluating pointer expressions. | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 572 | assert((E->getType()->isPointerType() || | 
| Ted Kremenek | a526c5c | 2008-01-07 19:49:32 +0000 | [diff] [blame] | 573 | E->getType()->isObjCQualifiedIdType()) && | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 574 | "EvalAddr only works on pointers"); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 575 |  | 
|  | 576 | // Our "symbolic interpreter" is just a dispatch off the currently | 
|  | 577 | // viewed AST node.  We then recursively traverse the AST by calling | 
|  | 578 | // EvalAddr and EvalVal appropriately. | 
|  | 579 | switch (E->getStmtClass()) { | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 580 | case Stmt::ParenExprClass: | 
|  | 581 | // Ignore parentheses. | 
|  | 582 | return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 583 |  | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 584 | case Stmt::UnaryOperatorClass: { | 
|  | 585 | // The only unary operator that make sense to handle here | 
|  | 586 | // is AddrOf.  All others don't make sense as pointers. | 
|  | 587 | UnaryOperator *U = cast<UnaryOperator>(E); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 588 |  | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 589 | if (U->getOpcode() == UnaryOperator::AddrOf) | 
|  | 590 | return EvalVal(U->getSubExpr()); | 
|  | 591 | else | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 592 | return NULL; | 
|  | 593 | } | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 594 |  | 
|  | 595 | case Stmt::BinaryOperatorClass: { | 
|  | 596 | // Handle pointer arithmetic.  All other binary operators are not valid | 
|  | 597 | // in this context. | 
|  | 598 | BinaryOperator *B = cast<BinaryOperator>(E); | 
|  | 599 | BinaryOperator::Opcode op = B->getOpcode(); | 
|  | 600 |  | 
|  | 601 | if (op != BinaryOperator::Add && op != BinaryOperator::Sub) | 
|  | 602 | return NULL; | 
|  | 603 |  | 
|  | 604 | Expr *Base = B->getLHS(); | 
|  | 605 |  | 
|  | 606 | // Determine which argument is the real pointer base.  It could be | 
|  | 607 | // the RHS argument instead of the LHS. | 
|  | 608 | if (!Base->getType()->isPointerType()) Base = B->getRHS(); | 
|  | 609 |  | 
|  | 610 | assert (Base->getType()->isPointerType()); | 
|  | 611 | return EvalAddr(Base); | 
|  | 612 | } | 
|  | 613 |  | 
|  | 614 | // For conditional operators we need to see if either the LHS or RHS are | 
|  | 615 | // valid DeclRefExpr*s.  If one of them is valid, we return it. | 
|  | 616 | case Stmt::ConditionalOperatorClass: { | 
|  | 617 | ConditionalOperator *C = cast<ConditionalOperator>(E); | 
|  | 618 |  | 
|  | 619 | // Handle the GNU extension for missing LHS. | 
|  | 620 | if (Expr *lhsExpr = C->getLHS()) | 
|  | 621 | if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) | 
|  | 622 | return LHS; | 
|  | 623 |  | 
|  | 624 | return EvalAddr(C->getRHS()); | 
|  | 625 | } | 
|  | 626 |  | 
|  | 627 | // For implicit casts, we need to handle conversions from arrays to | 
|  | 628 | // pointer values, and implicit pointer-to-pointer conversions. | 
|  | 629 | case Stmt::ImplicitCastExprClass: { | 
|  | 630 | ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); | 
|  | 631 | Expr* SubExpr = IE->getSubExpr(); | 
|  | 632 |  | 
|  | 633 | if (SubExpr->getType()->isPointerType() || | 
| Ted Kremenek | a526c5c | 2008-01-07 19:49:32 +0000 | [diff] [blame] | 634 | SubExpr->getType()->isObjCQualifiedIdType()) | 
| Chris Lattner | fae3f1f | 2007-12-28 05:31:15 +0000 | [diff] [blame] | 635 | return EvalAddr(SubExpr); | 
|  | 636 | else | 
|  | 637 | return EvalVal(SubExpr); | 
|  | 638 | } | 
|  | 639 |  | 
|  | 640 | // For casts, we handle pointer-to-pointer conversions (which | 
|  | 641 | // is essentially a no-op from our mini-interpreter's standpoint). | 
|  | 642 | // For other casts we abort. | 
|  | 643 | case Stmt::CastExprClass: { | 
|  | 644 | CastExpr *C = cast<CastExpr>(E); | 
|  | 645 | Expr *SubExpr = C->getSubExpr(); | 
|  | 646 |  | 
|  | 647 | if (SubExpr->getType()->isPointerType()) | 
|  | 648 | return EvalAddr(SubExpr); | 
|  | 649 | else | 
|  | 650 | return NULL; | 
|  | 651 | } | 
|  | 652 |  | 
|  | 653 | // C++ casts.  For dynamic casts, static casts, and const casts, we | 
|  | 654 | // are always converting from a pointer-to-pointer, so we just blow | 
|  | 655 | // through the cast.  In the case the dynamic cast doesn't fail | 
|  | 656 | // (and return NULL), we take the conservative route and report cases | 
|  | 657 | // where we return the address of a stack variable.  For Reinterpre | 
|  | 658 | case Stmt::CXXCastExprClass: { | 
|  | 659 | CXXCastExpr *C = cast<CXXCastExpr>(E); | 
|  | 660 |  | 
|  | 661 | if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { | 
|  | 662 | Expr *S = C->getSubExpr(); | 
|  | 663 | if (S->getType()->isPointerType()) | 
|  | 664 | return EvalAddr(S); | 
|  | 665 | else | 
|  | 666 | return NULL; | 
|  | 667 | } | 
|  | 668 | else | 
|  | 669 | return EvalAddr(C->getSubExpr()); | 
|  | 670 | } | 
|  | 671 |  | 
|  | 672 | // Everything else: we simply don't reason about them. | 
|  | 673 | default: | 
|  | 674 | return NULL; | 
|  | 675 | } | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 676 | } | 
|  | 677 |  | 
|  | 678 |  | 
|  | 679 | ///  EvalVal - This function is complements EvalAddr in the mutual recursion. | 
|  | 680 | ///   See the comments for EvalAddr for more details. | 
|  | 681 | static DeclRefExpr* EvalVal(Expr *E) { | 
|  | 682 |  | 
| Ted Kremenek | e8c600f | 2007-08-28 17:02:55 +0000 | [diff] [blame] | 683 | // We should only be called for evaluating non-pointer expressions, or | 
|  | 684 | // expressions with a pointer type that are not used as references but instead | 
|  | 685 | // are l-values (e.g., DeclRefExpr with a pointer type). | 
|  | 686 |  | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 687 | // Our "symbolic interpreter" is just a dispatch off the currently | 
|  | 688 | // viewed AST node.  We then recursively traverse the AST by calling | 
|  | 689 | // EvalAddr and EvalVal appropriately. | 
|  | 690 | switch (E->getStmtClass()) { | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 691 | case Stmt::DeclRefExprClass: { | 
|  | 692 | // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking | 
|  | 693 | //  at code that refers to a variable's name.  We check if it has local | 
|  | 694 | //  storage within the function, and if so, return the expression. | 
|  | 695 | DeclRefExpr *DR = cast<DeclRefExpr>(E); | 
|  | 696 |  | 
|  | 697 | if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) | 
|  | 698 | if(V->hasLocalStorage()) return DR; | 
|  | 699 |  | 
|  | 700 | return NULL; | 
|  | 701 | } | 
|  | 702 |  | 
|  | 703 | case Stmt::ParenExprClass: | 
|  | 704 | // Ignore parentheses. | 
|  | 705 | return EvalVal(cast<ParenExpr>(E)->getSubExpr()); | 
|  | 706 |  | 
|  | 707 | case Stmt::UnaryOperatorClass: { | 
|  | 708 | // The only unary operator that make sense to handle here | 
|  | 709 | // is Deref.  All others don't resolve to a "name."  This includes | 
|  | 710 | // handling all sorts of rvalues passed to a unary operator. | 
|  | 711 | UnaryOperator *U = cast<UnaryOperator>(E); | 
|  | 712 |  | 
|  | 713 | if (U->getOpcode() == UnaryOperator::Deref) | 
|  | 714 | return EvalAddr(U->getSubExpr()); | 
|  | 715 |  | 
|  | 716 | return NULL; | 
|  | 717 | } | 
|  | 718 |  | 
|  | 719 | case Stmt::ArraySubscriptExprClass: { | 
|  | 720 | // Array subscripts are potential references to data on the stack.  We | 
|  | 721 | // retrieve the DeclRefExpr* for the array variable if it indeed | 
|  | 722 | // has local storage. | 
| Ted Kremenek | 2324512 | 2007-08-20 16:18:38 +0000 | [diff] [blame] | 723 | return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 724 | } | 
|  | 725 |  | 
|  | 726 | case Stmt::ConditionalOperatorClass: { | 
|  | 727 | // For conditional operators we need to see if either the LHS or RHS are | 
|  | 728 | // non-NULL DeclRefExpr's.  If one is non-NULL, we return it. | 
|  | 729 | ConditionalOperator *C = cast<ConditionalOperator>(E); | 
|  | 730 |  | 
| Anders Carlsson | 3907323 | 2007-11-30 19:04:31 +0000 | [diff] [blame] | 731 | // Handle the GNU extension for missing LHS. | 
|  | 732 | if (Expr *lhsExpr = C->getLHS()) | 
|  | 733 | if (DeclRefExpr *LHS = EvalVal(lhsExpr)) | 
|  | 734 | return LHS; | 
|  | 735 |  | 
|  | 736 | return EvalVal(C->getRHS()); | 
| Ted Kremenek | 06de276 | 2007-08-17 16:46:58 +0000 | [diff] [blame] | 737 | } | 
|  | 738 |  | 
|  | 739 | // Accesses to members are potential references to data on the stack. | 
|  | 740 | case Stmt::MemberExprClass: { | 
|  | 741 | MemberExpr *M = cast<MemberExpr>(E); | 
|  | 742 |  | 
|  | 743 | // Check for indirect access.  We only want direct field accesses. | 
|  | 744 | if (!M->isArrow()) | 
|  | 745 | return EvalVal(M->getBase()); | 
|  | 746 | else | 
|  | 747 | return NULL; | 
|  | 748 | } | 
|  | 749 |  | 
|  | 750 | // Everything else: we simply don't reason about them. | 
|  | 751 | default: | 
|  | 752 | return NULL; | 
|  | 753 | } | 
|  | 754 | } | 
| Ted Kremenek | 588e5eb | 2007-11-25 00:58:00 +0000 | [diff] [blame] | 755 |  | 
|  | 756 | //===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// | 
|  | 757 |  | 
|  | 758 | /// Check for comparisons of floating point operands using != and ==. | 
|  | 759 | /// Issue a warning if these are no self-comparisons, as they are not likely | 
|  | 760 | /// to do what the programmer intended. | 
|  | 761 | void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { | 
|  | 762 | bool EmitWarning = true; | 
|  | 763 |  | 
| Ted Kremenek | 4e99a5f | 2008-01-17 16:57:34 +0000 | [diff] [blame] | 764 | Expr* LeftExprSansParen = lex->IgnoreParens(); | 
| Ted Kremenek | 32e97b6 | 2008-01-17 17:55:13 +0000 | [diff] [blame] | 765 | Expr* RightExprSansParen = rex->IgnoreParens(); | 
| Ted Kremenek | 588e5eb | 2007-11-25 00:58:00 +0000 | [diff] [blame] | 766 |  | 
|  | 767 | // Special case: check for x == x (which is OK). | 
|  | 768 | // Do not emit warnings for such cases. | 
|  | 769 | if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) | 
|  | 770 | if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) | 
|  | 771 | if (DRL->getDecl() == DRR->getDecl()) | 
|  | 772 | EmitWarning = false; | 
|  | 773 |  | 
| Ted Kremenek | 1b500bb | 2007-11-29 00:59:04 +0000 | [diff] [blame] | 774 |  | 
|  | 775 | // Special case: check for comparisons against literals that can be exactly | 
|  | 776 | //  represented by APFloat.  In such cases, do not emit a warning.  This | 
|  | 777 | //  is a heuristic: often comparison against such literals are used to | 
|  | 778 | //  detect if a value in a variable has not changed.  This clearly can | 
|  | 779 | //  lead to false negatives. | 
|  | 780 | if (EmitWarning) { | 
|  | 781 | if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { | 
|  | 782 | if (FLL->isExact()) | 
|  | 783 | EmitWarning = false; | 
|  | 784 | } | 
|  | 785 | else | 
|  | 786 | if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ | 
|  | 787 | if (FLR->isExact()) | 
|  | 788 | EmitWarning = false; | 
|  | 789 | } | 
|  | 790 | } | 
|  | 791 |  | 
| Ted Kremenek | 588e5eb | 2007-11-25 00:58:00 +0000 | [diff] [blame] | 792 | // Check for comparisons with builtin types. | 
|  | 793 | if (EmitWarning) | 
|  | 794 | if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) | 
|  | 795 | if (isCallBuiltin(CL)) | 
|  | 796 | EmitWarning = false; | 
|  | 797 |  | 
|  | 798 | if (EmitWarning) | 
|  | 799 | if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) | 
|  | 800 | if (isCallBuiltin(CR)) | 
|  | 801 | EmitWarning = false; | 
|  | 802 |  | 
|  | 803 | // Emit the diagnostic. | 
|  | 804 | if (EmitWarning) | 
|  | 805 | Diag(loc, diag::warn_floatingpoint_eq, | 
|  | 806 | lex->getSourceRange(),rex->getSourceRange()); | 
|  | 807 | } |