Add most of the boilerplate support for scanf format string checking. This includes
handling the parsing of scanf format strings and hooking the checking into Sema.
Most of this checking logic piggybacks on what was already there for checking printf format
strings, but the checking logic has been refactored to support both.
What is left to be done is to support argument type checking in format strings and of course
fix the usual tail of bugs that will follow.
llvm-svn: 108500
diff --git a/clang/lib/Analysis/PrintfFormatString.cpp b/clang/lib/Analysis/PrintfFormatString.cpp
index 558d38a..584fc12 100644
--- a/clang/lib/Analysis/PrintfFormatString.cpp
+++ b/clang/lib/Analysis/PrintfFormatString.cpp
@@ -1,4 +1,4 @@
-//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
+//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,141 +12,28 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/Type.h"
-#include "llvm/Support/raw_ostream.h"
+#include "clang/Analysis/Analyses/FormatString.h"
+#include "FormatStringParsing.h"
-using clang::analyze_printf::ArgTypeResult;
-using clang::analyze_printf::FormatSpecifier;
-using clang::analyze_printf::FormatStringHandler;
-using clang::analyze_printf::OptionalAmount;
-using clang::analyze_printf::PositionContext;
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
using clang::analyze_printf::ConversionSpecifier;
-using clang::analyze_printf::LengthModifier;
+using clang::analyze_printf::PrintfSpecifier;
using namespace clang;
-namespace {
-class FormatSpecifierResult {
- FormatSpecifier FS;
- const char *Start;
- bool Stop;
-public:
- FormatSpecifierResult(bool stop = false)
- : Start(0), Stop(stop) {}
- FormatSpecifierResult(const char *start,
- const FormatSpecifier &fs)
- : FS(fs), Start(start), Stop(false) {}
-
- const char *getStart() const { return Start; }
- bool shouldStop() const { return Stop; }
- bool hasValue() const { return Start != 0; }
- const FormatSpecifier &getValue() const {
- assert(hasValue());
- return FS;
- }
- const FormatSpecifier &getValue() { return FS; }
-};
-} // end anonymous namespace
-
-template <typename T>
-class UpdateOnReturn {
- T &ValueToUpdate;
- const T &ValueToCopy;
-public:
- UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
- : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
-
- ~UpdateOnReturn() {
- ValueToUpdate = ValueToCopy;
- }
-};
+typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
+ PrintfSpecifierResult;
//===----------------------------------------------------------------------===//
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
- const char *I = Beg;
- UpdateOnReturn <const char*> UpdateBeg(Beg, I);
+using analyze_format_string::ParseNonPositionAmount;
- unsigned accumulator = 0;
- bool hasDigits = false;
-
- for ( ; I != E; ++I) {
- char c = *I;
- if (c >= '0' && c <= '9') {
- hasDigits = true;
- accumulator = (accumulator * 10) + (c - '0');
- continue;
- }
-
- if (hasDigits)
- return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
- false);
-
- break;
- }
-
- return OptionalAmount();
-}
-
-static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
- unsigned &argIndex) {
- if (*Beg == '*') {
- ++Beg;
- return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
- }
-
- return ParseAmount(Beg, E);
-}
-
-static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
- const char *Start,
- const char *&Beg, const char *E,
- PositionContext p) {
- if (*Beg == '*') {
- const char *I = Beg + 1;
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- if (I== E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return OptionalAmount(false);
- }
-
- assert(Amt.getHowSpecified() == OptionalAmount::Constant);
-
- if (*I == '$') {
- // Handle positional arguments
-
- // Special case: '*0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Beg, I - Beg + 1);
- return OptionalAmount(false);
- }
-
- const char *Tmp = Beg;
- Beg = ++I;
-
- return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
- Tmp, 0, true);
- }
-
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- return ParseAmount(Beg, E);
-}
-
-static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
+static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
const char *Start, const char *&Beg, const char *E,
unsigned *argIndex) {
if (argIndex) {
@@ -154,7 +41,7 @@
}
else {
const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::PrecisionPos);
+ analyze_format_string::PrecisionPos);
if (Amt.isInvalid())
return true;
FS.setPrecision(Amt);
@@ -162,57 +49,7 @@
return false;
}
-static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
- const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex) {
- // FIXME: Support negative field widths.
- if (argIndex) {
- FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
- }
- else {
- const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::FieldWidthPos);
- if (Amt.isInvalid())
- return true;
- FS.setFieldWidth(Amt);
- }
- return false;
-}
-
-static bool ParseArgPosition(FormatStringHandler &H,
- FormatSpecifier &FS, const char *Start,
- const char *&Beg, const char *E) {
-
- using namespace clang::analyze_printf;
- const char *I = Beg;
-
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (I == E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return true;
- }
-
- if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
- // Special case: '%0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Start, I - Start);
- return true;
- }
-
- FS.setArgIndex(Amt.getConstantAmount() - 1);
- FS.setUsesPositionalArg();
- // Update the caller's pointer if we decided to consume
- // these characters.
- Beg = I;
- return false;
- }
-
- return false;
-}
-
-static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
+static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
const char *&Beg,
const char *E,
unsigned &argIndex) {
@@ -243,17 +80,17 @@
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
- FormatSpecifier FS;
+ PrintfSpecifier FS;
if (ParseArgPosition(H, FS, Start, I, E))
return true;
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -274,7 +111,7 @@
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -285,7 +122,7 @@
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -293,7 +130,7 @@
if (*I == '.') {
++I;
if (I == E) {
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -303,39 +140,15 @@
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
}
// Look for the length modifier.
- LengthModifier::Kind lmKind = LengthModifier::None;
- const char *lmPosition = I;
- switch (*I) {
- default:
- break;
- case 'h':
- ++I;
- lmKind = (I != E && *I == 'h') ?
- ++I, LengthModifier::AsChar : LengthModifier::AsShort;
- break;
- case 'l':
- ++I;
- lmKind = (I != E && *I == 'l') ?
- ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
- break;
- case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
- case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
- case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
- case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
- case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
- }
- LengthModifier lm(lmPosition, lmKind);
- FS.setLengthModifier(lm);
-
- if (I == E) {
+ if (ParseLengthModifier(FS, I, E) && I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -386,19 +199,20 @@
if (k == ConversionSpecifier::InvalidSpecifier) {
// Assume the conversion takes one argument.
- return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
}
- return FormatSpecifierResult(Start, FS);
+ return PrintfSpecifierResult(Start, FS);
}
-bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
- const char *I, const char *E) {
+bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
+ const char *I,
+ const char *E) {
unsigned argIndex = 0;
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
- const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
+ const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
@@ -408,7 +222,7 @@
if (!FSR.hasValue())
continue;
// We have a format specifier. Pass it to the callback.
- if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
+ if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
I - FSR.getStart()))
return true;
}
@@ -416,129 +230,6 @@
return false;
}
-FormatStringHandler::~FormatStringHandler() {}
-
-//===----------------------------------------------------------------------===//
-// Methods on ArgTypeResult.
-//===----------------------------------------------------------------------===//
-
-bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
- switch (K) {
- case InvalidTy:
- assert(false && "ArgTypeResult must be valid");
- return true;
-
- case UnknownTy:
- return true;
-
- case SpecificTy: {
- argTy = C.getCanonicalType(argTy).getUnqualifiedType();
- if (T == argTy)
- return true;
- if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- default:
- break;
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return T == C.UnsignedCharTy;
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- return T == C.SignedCharTy;
- case BuiltinType::Short:
- return T == C.UnsignedShortTy;
- case BuiltinType::UShort:
- return T == C.ShortTy;
- case BuiltinType::Int:
- return T == C.UnsignedIntTy;
- case BuiltinType::UInt:
- return T == C.IntTy;
- case BuiltinType::Long:
- return T == C.UnsignedLongTy;
- case BuiltinType::ULong:
- return T == C.LongTy;
- case BuiltinType::LongLong:
- return T == C.UnsignedLongLongTy;
- case BuiltinType::ULongLong:
- return T == C.LongLongTy;
- }
- return false;
- }
-
- case CStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy = PT->getPointeeType();
- if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- case BuiltinType::Void:
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return true;
- default:
- break;
- }
-
- return false;
- }
-
- case WCStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy =
- C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
- return pointeeTy == C.getWCharType();
- }
-
- case CPointerTy:
- return argTy->getAs<PointerType>() != NULL ||
- argTy->getAs<ObjCObjectPointerType>() != NULL;
-
- case ObjCPointerTy:
- return argTy->getAs<ObjCObjectPointerType>() != NULL;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return false;
-}
-
-QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
- switch (K) {
- case InvalidTy:
- assert(false && "No representative type for Invalid ArgTypeResult");
- // Fall-through.
- case UnknownTy:
- return QualType();
- case SpecificTy:
- return T;
- case CStrTy:
- return C.getPointerType(C.CharTy);
- case WCStrTy:
- return C.getPointerType(C.getWCharType());
- case ObjCPointerTy:
- return C.ObjCBuiltinIdTy;
- case CPointerTy:
- return C.VoidPtrTy;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return QualType();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
- return Ctx.IntTy;
-}
-
//===----------------------------------------------------------------------===//
// Methods on ConversionSpecifier.
//===----------------------------------------------------------------------===//
@@ -579,63 +270,10 @@
}
//===----------------------------------------------------------------------===//
-// Methods on LengthModifier.
+// Methods on PrintfSpecifier.
//===----------------------------------------------------------------------===//
-const char *LengthModifier::toString() const {
- switch (kind) {
- case AsChar:
- return "hh";
- case AsShort:
- return "h";
- case AsLong: // or AsWideChar
- return "l";
- case AsLongLong:
- return "ll";
- case AsIntMax:
- return "j";
- case AsSizeT:
- return "z";
- case AsPtrDiff:
- return "t";
- case AsLongDouble:
- return "L";
- case None:
- return "";
- }
- return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-void OptionalAmount::toString(llvm::raw_ostream &os) const {
- switch (hs) {
- case Invalid:
- case NotSpecified:
- return;
- case Arg:
- if (UsesDotPrefix)
- os << ".";
- if (usesPositionalArg())
- os << "*" << getPositionalArgIndex() << "$";
- else
- os << "*";
- break;
- case Constant:
- if (UsesDotPrefix)
- os << ".";
- os << amt;
- break;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on FormatSpecifier.
-//===----------------------------------------------------------------------===//
-
-ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
+ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
if (!CS.consumesDataArgument())
return ArgTypeResult::Invalid();
@@ -702,7 +340,7 @@
return ArgTypeResult();
}
-bool FormatSpecifier::fixType(QualType QT) {
+bool PrintfSpecifier::fixType(QualType QT) {
// Handle strings first (char *, wchar_t *)
if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
CS.setKind(ConversionSpecifier::CStrArg);
@@ -783,9 +421,9 @@
return true;
}
-void FormatSpecifier::toString(llvm::raw_ostream &os) const {
+void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
// Whilst some features have no defined order, we are using the order
- // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
+ // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
os << "%";
// Positional args
@@ -810,7 +448,7 @@
os << CS.toString();
}
-bool FormatSpecifier::hasValidPlusPrefix() const {
+bool PrintfSpecifier::hasValidPlusPrefix() const {
if (!HasPlusPrefix)
return true;
@@ -833,7 +471,7 @@
}
}
-bool FormatSpecifier::hasValidAlternativeForm() const {
+bool PrintfSpecifier::hasValidAlternativeForm() const {
if (!HasAlternativeForm)
return true;
@@ -856,7 +494,7 @@
}
}
-bool FormatSpecifier::hasValidLeadingZeros() const {
+bool PrintfSpecifier::hasValidLeadingZeros() const {
if (!HasLeadingZeroes)
return true;
@@ -883,7 +521,7 @@
}
}
-bool FormatSpecifier::hasValidSpacePrefix() const {
+bool PrintfSpecifier::hasValidSpacePrefix() const {
if (!HasSpacePrefix)
return true;
@@ -906,7 +544,7 @@
}
}
-bool FormatSpecifier::hasValidLeftJustified() const {
+bool PrintfSpecifier::hasValidLeftJustified() const {
if (!IsLeftJustified)
return true;
@@ -920,7 +558,7 @@
}
}
-bool FormatSpecifier::hasValidLengthModifier() const {
+bool PrintfSpecifier::hasValidLengthModifier() const {
switch (LM.getKind()) {
case LengthModifier::None:
return true;
@@ -988,7 +626,7 @@
return false;
}
-bool FormatSpecifier::hasValidPrecision() const {
+bool PrintfSpecifier::hasValidPrecision() const {
if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
return true;
@@ -1015,7 +653,7 @@
return false;
}
}
-bool FormatSpecifier::hasValidFieldWidth() const {
+bool PrintfSpecifier::hasValidFieldWidth() const {
if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
return true;