initial support for checking format strings, patch by Ted Kremenek:
"I've coded up some support in clang to flag warnings for non-constant format strings used in calls to printf-like functions (all the functions listed in "man fprintf"). Non-constant format strings are a source of many security exploits in C/C++ programs, and I believe are currently detected by gcc using the flag -Wformat-nonliteral."
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@41003 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/Sema/Sema.cpp b/Sema/Sema.cpp
index 64c18a8..76771d0 100644
--- a/Sema/Sema.cpp
+++ b/Sema/Sema.cpp
@@ -16,10 +16,26 @@
#include "clang/AST/ASTContext.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/Diagnostic.h"
+
using namespace clang;
Sema::Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup)
: PP(pp), Context(ctxt), CurFunctionDecl(0), LastInGroupList(prevInGroup) {
+
+ // Get IdentifierInfo objects for known functions for which we
+ // do extra checking.
+ IdentifierTable& IT = PP.getIdentifierTable();
+
+ KnownFunctionIDs[ id_printf ] = &IT.get("printf");
+ KnownFunctionIDs[ id_fprintf ] = &IT.get("fprintf");
+ KnownFunctionIDs[ id_sprintf ] = &IT.get("sprintf");
+ KnownFunctionIDs[ id_snprintf ] = &IT.get("snprintf");
+ KnownFunctionIDs[ id_vsnprintf ] = &IT.get("vsnprintf");
+ KnownFunctionIDs[ id_asprintf ] = &IT.get("asprintf");
+ KnownFunctionIDs[ id_vasprintf ] = &IT.get("vasprintf");
+ KnownFunctionIDs[ id_vfprintf ] = &IT.get("vfprintf");
+ KnownFunctionIDs[ id_vsprintf ] = &IT.get("vsprintf");
+ KnownFunctionIDs[ id_vprintf ] = &IT.get("vprintf");
}
//===----------------------------------------------------------------------===//
diff --git a/Sema/Sema.h b/Sema/Sema.h
index b6d4797..3cf7532 100644
--- a/Sema/Sema.h
+++ b/Sema/Sema.h
@@ -68,6 +68,28 @@
/// us to associate a raw vector type with one of the OCU type names.
/// This is only necessary for issuing pretty diagnostics.
llvm::SmallVector<TypedefDecl*, 24> OCUVectorDecls;
+
+ // Enum values used by KnownFunctionIDs (see below).
+ enum {
+ id_printf,
+ id_fprintf,
+ id_sprintf,
+ id_snprintf,
+ id_vsnprintf,
+ id_asprintf,
+ id_vasprintf,
+ id_vfprintf,
+ id_vsprintf,
+ id_vprintf,
+ id_num_known_functions
+ };
+
+ /// KnownFunctionIDs - This is a list of IdentifierInfo objects to a set
+ /// of known functions used by the semantic analysis to do various
+ /// kinds of checking (e.g. checking format string errors in printf calls).
+ /// This list is populated upon the creation of a Sema object.
+ IdentifierInfo* KnownFunctionIDs[ id_num_known_functions ];
+
public:
Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup);
@@ -395,7 +417,17 @@
/// a constant expression of type int with a value greater than zero. If the
/// array has an incomplete type or a valid constant size, return false,
/// otherwise emit a diagnostic and return true.
- bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc);
+ bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc);
+
+ //===--------------------------------------------------------------------===//
+ // Extra semantic analysis beyond the C type system
+ private:
+
+ void CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
+ Expr** Args, unsigned NumArgsInCall);
+
+ void CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
+ Expr** Args, unsigned NumArgsInCall);
};
diff --git a/Sema/SemaChecking.cpp b/Sema/SemaChecking.cpp
new file mode 100644
index 0000000..883cbb6
--- /dev/null
+++ b/Sema/SemaChecking.cpp
@@ -0,0 +1,90 @@
+//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Ted Kremenek and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements extra semantic analysis beyond what is enforced
+// by the C type system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sema.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace clang;
+
+/// CheckFunctionCall - Check a direct function call for various correctness
+/// and safety properties not strictly enforced by the C type system.
+void
+Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
+ Expr** Args, unsigned NumArgsInCall) {
+
+ // Get the IdentifierInfo* for the called function.
+ IdentifierInfo *FnInfo = FDecl->getIdentifier();
+
+ // Search the KnownFunctionIDs for the identifier.
+ unsigned i = 0, e = id_num_known_functions;
+ for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
+ if( i == e ) return;
+
+ // Printf checking.
+ if (i <= id_vprintf) {
+ // Retrieve the index of the format string parameter.
+ unsigned format_idx = 0;
+ switch (i) {
+ default: assert(false && "No format string argument index.");
+ case id_printf: format_idx = 0; break;
+ case id_fprintf: format_idx = 1; break;
+ case id_sprintf: format_idx = 1; break;
+ case id_snprintf: format_idx = 2; break;
+ case id_vsnprintf: format_idx = 2; break;
+ case id_asprintf: format_idx = 1; break;
+ case id_vasprintf: format_idx = 1; break;
+ case id_vfprintf: format_idx = 1; break;
+ case id_vsprintf: format_idx = 1; break;
+ case id_vprintf: format_idx = 1; break;
+ }
+ CheckPrintfArguments(Fn, FDecl, format_idx, Args, NumArgsInCall);
+ }
+}
+
+/// CheckPrintfArguments - Check calls to printf (and similar functions) for
+/// correct use of format strings. Improper format strings to functions in
+/// the printf family can be the source of bizarre bugs and very serious
+/// security holes. A good source of information is available in the following
+/// paper (which includes additional references):
+///
+/// FormatGuard: Automatic Protection From printf Format String
+/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
+void
+Sema::CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
+ Expr** Args, unsigned NumArgsInCall) {
+
+ assert( format_idx < NumArgsInCall );
+
+ // CHECK: format string is not a string literal.
+ //
+ // Dynamically generated format strings are difficult to automatically
+ // vet at compile time. Requiring that format strings are string literals
+ // (1) permits the checking of format strings by the compiler and thereby
+ // (2) can practically remove the source of many format string exploits.
+
+ StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]);
+
+ if ( FExpr == NULL )
+ Diag( Args[format_idx]->getLocStart(),
+ diag::warn_printf_not_string_constant, Fn->getSourceRange() );
+}
\ No newline at end of file
diff --git a/Sema/SemaExpr.cpp b/Sema/SemaExpr.cpp
index 8a3576a..6b871b4 100644
--- a/Sema/SemaExpr.cpp
+++ b/Sema/SemaExpr.cpp
@@ -22,6 +22,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
using namespace clang;
/// ParseStringLiteral - The specified tokens were lexed as pasted string
@@ -555,6 +556,13 @@
if (NumArgsInCall != NumArgsInProto && !proto->isVariadic())
return true;
}
+
+ // Do special checking on direct calls to functions.
+ if (ImplicitCastExpr *IcExpr = dyn_cast<ImplicitCastExpr>(Fn))
+ if (DeclRefExpr *DRExpr = dyn_cast<DeclRefExpr>(IcExpr->getSubExpr()))
+ if (FunctionDecl *FDecl = dyn_cast<FunctionDecl>(DRExpr->getDecl()))
+ CheckFunctionCall(Fn, FDecl, Args, NumArgsInCall);
+
return new CallExpr(Fn, Args, NumArgsInCall, resultType, RParenLoc);
}