Chris Lattner | 2e64c07 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 1 | //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by Ted Kremenek and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements extra semantic analysis beyond what is enforced |
| 11 | // by the C type system. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "Sema.h" |
| 16 | #include "clang/AST/ASTContext.h" |
| 17 | #include "clang/AST/Decl.h" |
| 18 | #include "clang/AST/Expr.h" |
| 19 | #include "clang/Lex/Preprocessor.h" |
| 20 | #include "clang/Lex/LiteralSupport.h" |
| 21 | #include "clang/Basic/SourceManager.h" |
| 22 | #include "clang/Basic/Diagnostic.h" |
| 23 | #include "clang/Basic/LangOptions.h" |
| 24 | #include "clang/Basic/TargetInfo.h" |
| 25 | #include "llvm/ADT/SmallString.h" |
| 26 | #include "llvm/ADT/StringExtras.h" |
| 27 | using namespace clang; |
| 28 | |
| 29 | /// CheckFunctionCall - Check a direct function call for various correctness |
| 30 | /// and safety properties not strictly enforced by the C type system. |
| 31 | void |
| 32 | Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl, |
| 33 | Expr** Args, unsigned NumArgsInCall) { |
| 34 | |
| 35 | // Get the IdentifierInfo* for the called function. |
| 36 | IdentifierInfo *FnInfo = FDecl->getIdentifier(); |
| 37 | |
| 38 | // Search the KnownFunctionIDs for the identifier. |
| 39 | unsigned i = 0, e = id_num_known_functions; |
| 40 | for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } |
| 41 | if( i == e ) return; |
| 42 | |
| 43 | // Printf checking. |
| 44 | if (i <= id_vprintf) { |
| 45 | // Retrieve the index of the format string parameter. |
| 46 | unsigned format_idx = 0; |
| 47 | switch (i) { |
| 48 | default: assert(false && "No format string argument index."); |
| 49 | case id_printf: format_idx = 0; break; |
| 50 | case id_fprintf: format_idx = 1; break; |
| 51 | case id_sprintf: format_idx = 1; break; |
| 52 | case id_snprintf: format_idx = 2; break; |
| 53 | case id_vsnprintf: format_idx = 2; break; |
| 54 | case id_asprintf: format_idx = 1; break; |
| 55 | case id_vasprintf: format_idx = 1; break; |
| 56 | case id_vfprintf: format_idx = 1; break; |
| 57 | case id_vsprintf: format_idx = 1; break; |
| 58 | case id_vprintf: format_idx = 1; break; |
| 59 | } |
Ted Kremenek | 3059654 | 2007-08-10 21:21:05 +0000 | [diff] [blame] | 60 | CheckPrintfArguments(Fn, i, FDecl, format_idx, Args, NumArgsInCall); |
Chris Lattner | 2e64c07 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 61 | } |
| 62 | } |
| 63 | |
| 64 | /// CheckPrintfArguments - Check calls to printf (and similar functions) for |
| 65 | /// correct use of format strings. Improper format strings to functions in |
| 66 | /// the printf family can be the source of bizarre bugs and very serious |
| 67 | /// security holes. A good source of information is available in the following |
| 68 | /// paper (which includes additional references): |
| 69 | /// |
| 70 | /// FormatGuard: Automatic Protection From printf Format String |
| 71 | /// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. |
| 72 | void |
Ted Kremenek | 3059654 | 2007-08-10 21:21:05 +0000 | [diff] [blame] | 73 | Sema::CheckPrintfArguments(Expr *Fn, unsigned id_idx, FunctionDecl *FDecl, |
| 74 | unsigned format_idx, Expr** Args, |
| 75 | unsigned NumArgsInCall) { |
Chris Lattner | 2e64c07 | 2007-08-10 20:18:51 +0000 | [diff] [blame] | 76 | |
| 77 | assert( format_idx < NumArgsInCall ); |
| 78 | |
| 79 | // CHECK: format string is not a string literal. |
| 80 | // |
| 81 | // Dynamically generated format strings are difficult to automatically |
| 82 | // vet at compile time. Requiring that format strings are string literals |
| 83 | // (1) permits the checking of format strings by the compiler and thereby |
| 84 | // (2) can practically remove the source of many format string exploits. |
| 85 | |
| 86 | StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]); |
| 87 | |
| 88 | if ( FExpr == NULL ) |
| 89 | Diag( Args[format_idx]->getLocStart(), |
| 90 | diag::warn_printf_not_string_constant, Fn->getSourceRange() ); |
| 91 | } |