Improve static checks for sprintf and __builtin___sprintf_chk
Implement a pessimistic evaluator of the minimal required size for a buffer
based on the format string, and couple that with the fortified version to emit a
warning when the buffer size is lower than the lower bound computed from the
format string.
Differential Revision: https://reviews.llvm.org/D71566
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 488f747..b49c222 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -741,6 +741,12 @@
"'%0' size argument is too large; destination buffer has size %1,"
" but size argument is %2">, InGroup<FortifySource>;
+def warn_fortify_source_format_overflow : Warning<
+ "'%0' will always overflow; destination buffer has size %1,"
+ " but format string expands to at least %2">,
+ InGroup<FortifySource>;
+
+
/// main()
// static main() is not an error in C, just in C++.
def warn_static_main : Warning<"'main' should not be declared static">,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 186f2b5..ab63120 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -390,13 +390,194 @@
return false;
}
+namespace {
+
+class EstimateSizeFormatHandler
+ : public analyze_format_string::FormatStringHandler {
+ size_t Size;
+
+public:
+ EstimateSizeFormatHandler(StringRef Format)
+ : Size(std::min(Format.find(0), Format.size()) +
+ 1 /* null byte always written by sprintf */) {}
+
+ bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+ const char *, unsigned SpecifierLen) override {
+
+ const size_t FieldWidth = computeFieldWidth(FS);
+ const size_t Precision = computePrecision(FS);
+
+ // The actual format.
+ switch (FS.getConversionSpecifier().getKind()) {
+ // Just a char.
+ case analyze_format_string::ConversionSpecifier::cArg:
+ case analyze_format_string::ConversionSpecifier::CArg:
+ Size += std::max(FieldWidth, (size_t)1);
+ break;
+ // Just an integer.
+ case analyze_format_string::ConversionSpecifier::dArg:
+ case analyze_format_string::ConversionSpecifier::DArg:
+ case analyze_format_string::ConversionSpecifier::iArg:
+ case analyze_format_string::ConversionSpecifier::oArg:
+ case analyze_format_string::ConversionSpecifier::OArg:
+ case analyze_format_string::ConversionSpecifier::uArg:
+ case analyze_format_string::ConversionSpecifier::UArg:
+ case analyze_format_string::ConversionSpecifier::xArg:
+ case analyze_format_string::ConversionSpecifier::XArg:
+ Size += std::max(FieldWidth, Precision);
+ break;
+
+ // %g style conversion switches between %f or %e style dynamically.
+ // %f always takes less space, so default to it.
+ case analyze_format_string::ConversionSpecifier::gArg:
+ case analyze_format_string::ConversionSpecifier::GArg:
+
+ // Floating point number in the form '[+]ddd.ddd'.
+ case analyze_format_string::ConversionSpecifier::fArg:
+ case analyze_format_string::ConversionSpecifier::FArg:
+ Size += std::max(FieldWidth, 1 /* integer part */ +
+ (Precision ? 1 + Precision
+ : 0) /* period + decimal */);
+ break;
+
+ // Floating point number in the form '[-]d.ddde[+-]dd'.
+ case analyze_format_string::ConversionSpecifier::eArg:
+ case analyze_format_string::ConversionSpecifier::EArg:
+ Size +=
+ std::max(FieldWidth,
+ 1 /* integer part */ +
+ (Precision ? 1 + Precision : 0) /* period + decimal */ +
+ 1 /* e or E letter */ + 2 /* exponent */);
+ break;
+
+ // Floating point number in the form '[-]0xh.hhhhp±dd'.
+ case analyze_format_string::ConversionSpecifier::aArg:
+ case analyze_format_string::ConversionSpecifier::AArg:
+ Size +=
+ std::max(FieldWidth,
+ 2 /* 0x */ + 1 /* integer part */ +
+ (Precision ? 1 + Precision : 0) /* period + decimal */ +
+ 1 /* p or P letter */ + 1 /* + or - */ + 1 /* value */);
+ break;
+
+ // Just a string.
+ case analyze_format_string::ConversionSpecifier::sArg:
+ case analyze_format_string::ConversionSpecifier::SArg:
+ Size += FieldWidth;
+ break;
+
+ // Just a pointer in the form '0xddd'.
+ case analyze_format_string::ConversionSpecifier::pArg:
+ Size += std::max(FieldWidth, 2 /* leading 0x */ + Precision);
+ break;
+
+ // A plain percent.
+ case analyze_format_string::ConversionSpecifier::PercentArg:
+ Size += 1;
+ break;
+
+ default:
+ break;
+ }
+
+ Size += FS.hasPlusPrefix() || FS.hasSpacePrefix();
+
+ if (FS.hasAlternativeForm()) {
+ switch (FS.getConversionSpecifier().getKind()) {
+ default:
+ break;
+ // Force a leading '0'.
+ case analyze_format_string::ConversionSpecifier::oArg:
+ Size += 1;
+ break;
+ // Force a leading '0x'.
+ case analyze_format_string::ConversionSpecifier::xArg:
+ case analyze_format_string::ConversionSpecifier::XArg:
+ Size += 2;
+ break;
+ // Force a period '.' before decimal, even if precision is 0.
+ case analyze_format_string::ConversionSpecifier::aArg:
+ case analyze_format_string::ConversionSpecifier::AArg:
+ case analyze_format_string::ConversionSpecifier::eArg:
+ case analyze_format_string::ConversionSpecifier::EArg:
+ case analyze_format_string::ConversionSpecifier::fArg:
+ case analyze_format_string::ConversionSpecifier::FArg:
+ case analyze_format_string::ConversionSpecifier::gArg:
+ case analyze_format_string::ConversionSpecifier::GArg:
+ Size += (Precision ? 0 : 1);
+ break;
+ }
+ }
+ assert(SpecifierLen <= Size && "no underflow");
+ Size -= SpecifierLen;
+ return true;
+ }
+
+ size_t getSizeLowerBound() const { return Size; }
+
+private:
+ static size_t computeFieldWidth(const analyze_printf::PrintfSpecifier &FS) {
+ const analyze_format_string::OptionalAmount &FW = FS.getFieldWidth();
+ size_t FieldWidth = 0;
+ if (FW.getHowSpecified() == analyze_format_string::OptionalAmount::Constant)
+ FieldWidth = FW.getConstantAmount();
+ return FieldWidth;
+ }
+
+ static size_t computePrecision(const analyze_printf::PrintfSpecifier &FS) {
+ const analyze_format_string::OptionalAmount &FW = FS.getPrecision();
+ size_t Precision = 0;
+
+ // See man 3 printf for default precision value based on the specifier.
+ switch (FW.getHowSpecified()) {
+ case analyze_format_string::OptionalAmount::NotSpecified:
+ switch (FS.getConversionSpecifier().getKind()) {
+ default:
+ break;
+ case analyze_format_string::ConversionSpecifier::dArg: // %d
+ case analyze_format_string::ConversionSpecifier::DArg: // %D
+ case analyze_format_string::ConversionSpecifier::iArg: // %i
+ Precision = 1;
+ break;
+ case analyze_format_string::ConversionSpecifier::oArg: // %d
+ case analyze_format_string::ConversionSpecifier::OArg: // %D
+ case analyze_format_string::ConversionSpecifier::uArg: // %d
+ case analyze_format_string::ConversionSpecifier::UArg: // %D
+ case analyze_format_string::ConversionSpecifier::xArg: // %d
+ case analyze_format_string::ConversionSpecifier::XArg: // %D
+ Precision = 1;
+ break;
+ case analyze_format_string::ConversionSpecifier::fArg: // %f
+ case analyze_format_string::ConversionSpecifier::FArg: // %F
+ case analyze_format_string::ConversionSpecifier::eArg: // %e
+ case analyze_format_string::ConversionSpecifier::EArg: // %E
+ case analyze_format_string::ConversionSpecifier::gArg: // %g
+ case analyze_format_string::ConversionSpecifier::GArg: // %G
+ Precision = 6;
+ break;
+ case analyze_format_string::ConversionSpecifier::pArg: // %d
+ Precision = 1;
+ break;
+ }
+ break;
+ case analyze_format_string::OptionalAmount::Constant:
+ Precision = FW.getConstantAmount();
+ break;
+ default:
+ break;
+ }
+ return Precision;
+ }
+};
+
+} // namespace
+
/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
/// __builtin_*_chk function, then use the object size argument specified in the
/// source. Otherwise, infer the object size using __builtin_object_size.
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
// FIXME: There are some more useful checks we could be doing here:
- // - Analyze the format string of sprintf to see how much of buffer is used.
// - Evaluate strlen of strcpy arguments, use as object size.
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
@@ -407,12 +588,55 @@
if (!BuiltinID)
return;
+ const TargetInfo &TI = getASTContext().getTargetInfo();
+ unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
+
unsigned DiagID = 0;
bool IsChkVariant = false;
+ Optional<llvm::APSInt> UsedSize;
unsigned SizeIndex, ObjectIndex;
switch (BuiltinID) {
default:
return;
+ case Builtin::BIsprintf:
+ case Builtin::BI__builtin___sprintf_chk: {
+ size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
+ auto *FormatExpr = TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
+
+ if (auto *Format = dyn_cast<StringLiteral>(FormatExpr)) {
+
+ if (!Format->isAscii() && !Format->isUTF8())
+ return;
+
+ StringRef FormatStrRef = Format->getString();
+ EstimateSizeFormatHandler H(FormatStrRef);
+ const char *FormatBytes = FormatStrRef.data();
+ const ConstantArrayType *T =
+ Context.getAsConstantArrayType(Format->getType());
+ assert(T && "String literal not of constant array type!");
+ size_t TypeSize = T->getSize().getZExtValue();
+
+ // In case there's a null byte somewhere.
+ size_t StrLen =
+ std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
+ if (!analyze_format_string::ParsePrintfString(
+ H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
+ Context.getTargetInfo(), false)) {
+ DiagID = diag::warn_fortify_source_format_overflow;
+ UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
+ .extOrTrunc(SizeTypeWidth);
+ if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
+ IsChkVariant = true;
+ ObjectIndex = 2;
+ } else {
+ IsChkVariant = false;
+ ObjectIndex = 0;
+ }
+ break;
+ }
+ }
+ return;
+ }
case Builtin::BI__builtin___memcpy_chk:
case Builtin::BI__builtin___memmove_chk:
case Builtin::BI__builtin___memset_chk:
@@ -505,19 +729,19 @@
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
return;
// Get the object size in the target's size_t width.
- const TargetInfo &TI = getASTContext().getTargetInfo();
- unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
}
// Evaluate the number of bytes of the object that this call will use.
- Expr::EvalResult Result;
- Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
- if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
- return;
- llvm::APSInt UsedSize = Result.Val.getInt();
+ if (!UsedSize) {
+ Expr::EvalResult Result;
+ Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
+ if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
+ return;
+ UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
+ }
- if (UsedSize.ule(ObjectSize))
+ if (UsedSize.getValue().ule(ObjectSize))
return;
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@@ -533,7 +757,7 @@
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
PDiag(DiagID)
<< FunctionName << ObjectSize.toString(/*Radix=*/10)
- << UsedSize.toString(/*Radix=*/10));
+ << UsedSize.getValue().toString(/*Radix=*/10));
}
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index d9c21c0..0f93a68 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -11,6 +11,8 @@
extern "C" {
#endif
+extern int sprintf(char *str, const char *format, ...);
+
#if defined(USE_PASS_OBJECT_SIZE)
void *memcpy(void *dst, const void *src, size_t c);
static void *memcpy(void *dst __attribute__((pass_object_size(1))), const void *src, size_t c) __attribute__((overloadable)) __asm__("merp");
@@ -96,6 +98,91 @@
__builtin_vsnprintf(buf, 11, "merp", list); // expected-warning {{'vsnprintf' size argument is too large; destination buffer has size 10, but size argument is 11}}
}
+void call_sprintf_chk(char *buf) {
+ __builtin___sprintf_chk(buf, 1, 6, "hell\n");
+ __builtin___sprintf_chk(buf, 1, 5, "hell\n"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
+ __builtin___sprintf_chk(buf, 1, 6, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
+ __builtin___sprintf_chk(buf, 1, 2, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
+ // expected-warning@-1 {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 5}}
+ __builtin___sprintf_chk(buf, 1, 6, "hello");
+ __builtin___sprintf_chk(buf, 1, 5, "hello"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
+ __builtin___sprintf_chk(buf, 1, 2, "%c", '9');
+ __builtin___sprintf_chk(buf, 1, 1, "%c", '9'); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%d", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%i", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%o", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%o", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%u", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%u", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%x", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%X", 9);
+ __builtin___sprintf_chk(buf, 1, 1, "%X", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%hhd", (char)9);
+ __builtin___sprintf_chk(buf, 1, 1, "%hhd", (char)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%hd", (short)9);
+ __builtin___sprintf_chk(buf, 1, 1, "%hd", (short)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%ld", 9l);
+ __builtin___sprintf_chk(buf, 1, 1, "%ld", 9l); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%lld", 9ll);
+ __builtin___sprintf_chk(buf, 1, 1, "%lld", 9ll); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 2, "%%");
+ __builtin___sprintf_chk(buf, 1, 1, "%%"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 1, but format string expands to at least 2}}
+ __builtin___sprintf_chk(buf, 1, 4, "%#x", 9);
+ __builtin___sprintf_chk(buf, 1, 3, "%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+ __builtin___sprintf_chk(buf, 1, 4, "%p", (void *)9);
+ __builtin___sprintf_chk(buf, 1, 3, "%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 3, but format string expands to at least 4}}
+ __builtin___sprintf_chk(buf, 1, 3, "%+d", 9);
+ __builtin___sprintf_chk(buf, 1, 2, "%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
+ __builtin___sprintf_chk(buf, 1, 3, "% i", 9);
+ __builtin___sprintf_chk(buf, 1, 2, "% i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 2, but format string expands to at least 3}}
+ __builtin___sprintf_chk(buf, 1, 6, "%5d", 9);
+ __builtin___sprintf_chk(buf, 1, 5, "%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 5, but format string expands to at least 6}}
+ __builtin___sprintf_chk(buf, 1, 9, "%f", 9.f);
+ __builtin___sprintf_chk(buf, 1, 8, "%f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 8, but format string expands to at least 9}}
+ __builtin___sprintf_chk(buf, 1, 9, "%Lf", (long double)9.);
+ __builtin___sprintf_chk(buf, 1, 8, "%Lf", (long double)9.); // expected-warning {{'sprintf' will always overflow; destination buffer has size 8, but format string expands to at least 9}}
+ __builtin___sprintf_chk(buf, 1, 10, "%+f", 9.f);
+ __builtin___sprintf_chk(buf, 1, 9, "%+f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 9, but format string expands to at least 10}}
+ __builtin___sprintf_chk(buf, 1, 12, "%e", 9.f);
+ __builtin___sprintf_chk(buf, 1, 11, "%e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 11, but format string expands to at least 12}}
+}
+
+void call_sprintf() {
+ char buf[6];
+ sprintf(buf, "hell\0 boy"); // expected-warning {{format string contains '\0' within the string body}}
+ sprintf(buf, "hello b\0y"); // expected-warning {{format string contains '\0' within the string body}}
+ // expected-warning@-1 {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}}
+ sprintf(buf, "hello");
+ sprintf(buf, "hello!"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "1234%%");
+ sprintf(buf, "12345%%"); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "1234%c", '9');
+ sprintf(buf, "12345%c", '9'); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "1234%d", 9);
+ sprintf(buf, "12345%d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "1234%lld", 9ll);
+ sprintf(buf, "12345%lld", 9ll); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "12%#x", 9);
+ sprintf(buf, "123%#x", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "12%p", (void *)9);
+ sprintf(buf, "123%p", (void *)9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "123%+d", 9);
+ sprintf(buf, "1234%+d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "123% i", 9);
+ sprintf(buf, "1234% i", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "%5d", 9);
+ sprintf(buf, "1%5d", 9); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "%.3f", 9.f);
+ sprintf(buf, "5%.3f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "%+.2f", 9.f);
+ sprintf(buf, "%+.3f", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 7}}
+ sprintf(buf, "%.0e", 9.f);
+ sprintf(buf, "5%.1e", 9.f); // expected-warning {{'sprintf' will always overflow; destination buffer has size 6, but format string expands to at least 8}}
+}
+
#ifdef __cplusplus
template <class> struct S {
void mf() const {