Bug 7377: Fixed several bad printf format string bugs.
- Added warning for undefined behavior when using field specifier
- Added warning for undefined behavior when using length modifier
- Fixed warnings for invalid flags
- Added warning for ignored flags
- Added fixits for the above warnings
- Fixed accuracy of detecting several undefined behavior conditions
- Receive normal warnings in addition to security warnings when using %n
- Fix bug where '+' flag would remain on unsigned conversion suggestions

Summary of changes:
- Added expanded tests
- Added/expanded warnings
- Added position info to OptionalAmounts for fixits
- Extracted optional flags to a wrapper class with position info for fixits
- Added several methods to validate a FormatSpecifier by component, each checking for undefined behavior
- Fixed conversion specifier checking to conform to C99 standard
- Added hooks to detect the invalid states in CheckPrintfHandler::HandleFormatSpecifier

Note: warnings involving the ' ' (space) flag are temporarily disabled until whitespace highlighting no longer triggers assertions. I will make a post about this on cfe-dev shortly.

M    test/Sema/format-strings.c
M    include/clang/Basic/DiagnosticSemaKinds.td
M    include/clang/Analysis/Analyses/PrintfFormatString.h
M    lib/Analysis/PrintfFormatString.cpp
M    lib/Sema/SemaChecking.cpp


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@106233 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp
index de84af6..ba32e74 100644
--- a/lib/Analysis/PrintfFormatString.cpp
+++ b/lib/Analysis/PrintfFormatString.cpp
@@ -83,7 +83,8 @@
     }
 
     if (hasDigits)
-      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, 0);
+      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
+          false);
 
     break;
   }
@@ -95,7 +96,7 @@
                                              unsigned &argIndex) {
   if (*Beg == '*') {
     ++Beg;
-    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0);
+    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
   }
 
   return ParseAmount(Beg, E);
@@ -135,7 +136,7 @@
       Beg = ++I;
 
       return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
-                            Tmp, 1);
+                            Tmp, 0, true);
     }
 
     H.HandleInvalidPosition(Beg, I - Beg, p);
@@ -261,11 +262,11 @@
   for ( ; I != E; ++I) {
     switch (*I) {
       default: hasMore = false; break;
-      case '-': FS.setIsLeftJustified(); break;
-      case '+': FS.setHasPlusPrefix(); break;
-      case ' ': FS.setHasSpacePrefix(); break;
-      case '#': FS.setHasAlternativeForm(); break;
-      case '0': FS.setHasLeadingZeros(); break;
+      case '-': FS.setIsLeftJustified(I); break;
+      case '+': FS.setHasPlusPrefix(I); break;
+      case ' ': FS.setHasSpacePrefix(I); break;
+      case '#': FS.setHasAlternativeForm(I); break;
+      case '0': FS.setHasLeadingZeros(I); break;
     }
     if (!hasMore)
       break;
@@ -616,12 +617,12 @@
     return;
   case Arg:
     if (usesPositionalArg())
-      os << ".*" << getPositionalArgIndex() << "$";
+      os << "*" << getPositionalArgIndex() << "$";
     else
-      os << ".*";
+      os << "*";
     break;
   case Constant:
-    os << "." << amt;
+    os << amt;
     break;
   }
 }
@@ -749,6 +750,7 @@
     Precision.setHowSpecified(OptionalAmount::NotSpecified);
     HasAlternativeForm = 0;
     HasLeadingZeroes = 0;
+    HasPlusPrefix = 0;
   }
   // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
   else if (QT->isFloatingType()) {
@@ -759,6 +761,7 @@
     Precision.setHowSpecified(OptionalAmount::NotSpecified);
     HasAlternativeForm = 0;
     HasLeadingZeroes = 0;
+    HasPlusPrefix = 0;
   }
   else if (QT->isSignedIntegerType()) {
     CS.setKind(ConversionSpecifier::dArg);
@@ -767,6 +770,7 @@
   else if (QT->isUnsignedIntegerType()) {
     CS.setKind(ConversionSpecifier::uArg);
     HasAlternativeForm = 0;
+    HasPlusPrefix = 0;
   }
   else {
     return false;
@@ -801,3 +805,222 @@
   // Conversion specifier
   os << CS.toString();
 }
+
+bool FormatSpecifier::hasValidPlusPrefix() const {
+  if (!HasPlusPrefix)
+    return true;
+
+  // The plus prefix only makes sense for signed conversions
+  switch (CS.getKind()) {
+  case ConversionSpecifier::dArg:
+  case ConversionSpecifier::iArg:
+  case ConversionSpecifier::fArg:
+  case ConversionSpecifier::FArg:
+  case ConversionSpecifier::eArg:
+  case ConversionSpecifier::EArg:
+  case ConversionSpecifier::gArg:
+  case ConversionSpecifier::GArg:
+  case ConversionSpecifier::aArg:
+  case ConversionSpecifier::AArg:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool FormatSpecifier::hasValidAlternativeForm() const {
+  if (!HasAlternativeForm)
+    return true;
+
+  // Alternate form flag only valid with the oxaAeEfFgG conversions
+  switch (CS.getKind()) {
+  case ConversionSpecifier::oArg:
+  case ConversionSpecifier::xArg:
+  case ConversionSpecifier::aArg:
+  case ConversionSpecifier::AArg:
+  case ConversionSpecifier::eArg:
+  case ConversionSpecifier::EArg:
+  case ConversionSpecifier::fArg:
+  case ConversionSpecifier::FArg:
+  case ConversionSpecifier::gArg:
+  case ConversionSpecifier::GArg:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool FormatSpecifier::hasValidLeadingZeros() const {
+  if (!HasLeadingZeroes)
+    return true;
+
+  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
+  switch (CS.getKind()) {
+  case ConversionSpecifier::dArg:
+  case ConversionSpecifier::iArg:
+  case ConversionSpecifier::oArg:
+  case ConversionSpecifier::uArg:
+  case ConversionSpecifier::xArg:
+  case ConversionSpecifier::XArg:
+  case ConversionSpecifier::aArg:
+  case ConversionSpecifier::AArg:
+  case ConversionSpecifier::eArg:
+  case ConversionSpecifier::EArg:
+  case ConversionSpecifier::fArg:
+  case ConversionSpecifier::FArg:
+  case ConversionSpecifier::gArg:
+  case ConversionSpecifier::GArg:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool FormatSpecifier::hasValidSpacePrefix() const {
+  if (!HasSpacePrefix)
+    return true;
+
+  // The space prefix only makes sense for signed conversions
+  switch (CS.getKind()) {
+  case ConversionSpecifier::dArg:
+  case ConversionSpecifier::iArg:
+  case ConversionSpecifier::fArg:
+  case ConversionSpecifier::FArg:
+  case ConversionSpecifier::eArg:
+  case ConversionSpecifier::EArg:
+  case ConversionSpecifier::gArg:
+  case ConversionSpecifier::GArg:
+  case ConversionSpecifier::aArg:
+  case ConversionSpecifier::AArg:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool FormatSpecifier::hasValidLeftJustified() const {
+  if (!IsLeftJustified)
+    return true;
+
+  // The left justified flag is valid for all conversions except n
+  switch (CS.getKind()) {
+  case ConversionSpecifier::OutIntPtrArg:
+    return false;
+
+  default:
+    return true;
+  }
+}
+
+bool FormatSpecifier::hasValidLengthModifier() const {
+  switch (LM.getKind()) {
+  case LengthModifier::None:
+    return true;
+
+  // Handle most integer flags
+  case LengthModifier::AsChar:
+  case LengthModifier::AsShort:
+  case LengthModifier::AsLongLong:
+  case LengthModifier::AsIntMax:
+  case LengthModifier::AsSizeT:
+  case LengthModifier::AsPtrDiff:
+    switch (CS.getKind()) {
+    case ConversionSpecifier::dArg:
+    case ConversionSpecifier::iArg:
+    case ConversionSpecifier::oArg:
+    case ConversionSpecifier::uArg:
+    case ConversionSpecifier::xArg:
+    case ConversionSpecifier::XArg:
+    case ConversionSpecifier::OutIntPtrArg:
+      return true;
+    default:
+      return false;
+    }
+
+  // Handle 'l' flag
+  case LengthModifier::AsLong:
+    switch (CS.getKind()) {
+    case ConversionSpecifier::dArg:
+    case ConversionSpecifier::iArg:
+    case ConversionSpecifier::oArg:
+    case ConversionSpecifier::uArg:
+    case ConversionSpecifier::xArg:
+    case ConversionSpecifier::XArg:
+    case ConversionSpecifier::aArg:
+    case ConversionSpecifier::AArg:
+    case ConversionSpecifier::fArg:
+    case ConversionSpecifier::FArg:
+    case ConversionSpecifier::eArg:
+    case ConversionSpecifier::EArg:
+    case ConversionSpecifier::gArg:
+    case ConversionSpecifier::GArg:
+    case ConversionSpecifier::OutIntPtrArg:
+    case ConversionSpecifier::IntAsCharArg:
+    case ConversionSpecifier::CStrArg:
+      return true;
+    default:
+      return false;
+    }
+
+  case LengthModifier::AsLongDouble:
+    switch (CS.getKind()) {
+    case ConversionSpecifier::aArg:
+    case ConversionSpecifier::AArg:
+    case ConversionSpecifier::fArg:
+    case ConversionSpecifier::FArg:
+    case ConversionSpecifier::eArg:
+    case ConversionSpecifier::EArg:
+    case ConversionSpecifier::gArg:
+    case ConversionSpecifier::GArg:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+bool FormatSpecifier::hasValidPrecision() const {
+  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
+    return true;
+
+  // Precision is only valid with the diouxXaAeEfFgGs conversions
+  switch (CS.getKind()) {
+  case ConversionSpecifier::dArg:
+  case ConversionSpecifier::iArg:
+  case ConversionSpecifier::oArg:
+  case ConversionSpecifier::uArg:
+  case ConversionSpecifier::xArg:
+  case ConversionSpecifier::XArg:
+  case ConversionSpecifier::aArg:
+  case ConversionSpecifier::AArg:
+  case ConversionSpecifier::eArg:
+  case ConversionSpecifier::EArg:
+  case ConversionSpecifier::fArg:
+  case ConversionSpecifier::FArg:
+  case ConversionSpecifier::gArg:
+  case ConversionSpecifier::GArg:
+  case ConversionSpecifier::CStrArg:
+    return true;
+
+  default:
+    return false;
+  }
+}
+bool FormatSpecifier::hasValidFieldWidth() const {
+  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
+      return true;
+
+  // The field width is valid for all conversions except n
+  switch (CS.getKind()) {
+  case ConversionSpecifier::OutIntPtrArg:
+    return false;
+
+  default:
+    return true;
+  }
+}
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 4ded4ef..2dc2c22 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -1200,9 +1200,17 @@
 
   bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k,
                     const char *startSpecifier, unsigned specifierLen);
-  void HandleFlags(const analyze_printf::FormatSpecifier &FS,
-                   llvm::StringRef flag, llvm::StringRef cspec,
-                   const char *startSpecifier, unsigned specifierLen);
+  void HandleInvalidAmount(const analyze_printf::FormatSpecifier &FS,
+                           const analyze_printf::OptionalAmount &Amt,
+                           unsigned type,
+                           const char *startSpecifier, unsigned specifierLen);
+  void HandleFlag(const analyze_printf::FormatSpecifier &FS,
+                  const analyze_printf::OptionalFlag &flag,
+                  const char *startSpecifier, unsigned specifierLen);
+  void HandleIgnoredFlag(const analyze_printf::FormatSpecifier &FS,
+                         const analyze_printf::OptionalFlag &ignoredFlag,
+                         const analyze_printf::OptionalFlag &flag,
+                         const char *startSpecifier, unsigned specifierLen);
 
   const Expr *getDataArg(unsigned i) const;
 };
@@ -1287,16 +1295,6 @@
   return TheCall->getArg(FirstDataArg + i);
 }
 
-void CheckPrintfHandler::HandleFlags(const analyze_printf::FormatSpecifier &FS,
-                                     llvm::StringRef flag,
-                                     llvm::StringRef cspec,
-                                     const char *startSpecifier,
-                                     unsigned specifierLen) {
-  const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier();
-  S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_nonsensical_flag)
-    << flag << cspec << getFormatSpecifierRange(startSpecifier, specifierLen);
-}
-
 bool
 CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
                                  unsigned k, const char *startSpecifier,
@@ -1341,6 +1339,62 @@
   return true;
 }
 
+void CheckPrintfHandler::HandleInvalidAmount(
+                                      const analyze_printf::FormatSpecifier &FS,
+                                      const analyze_printf::OptionalAmount &Amt,
+                                      unsigned type,
+                                      const char *startSpecifier,
+                                      unsigned specifierLen) {
+  const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier();
+  switch (Amt.getHowSpecified()) {
+  case analyze_printf::OptionalAmount::Constant:
+    S.Diag(getLocationOfByte(Amt.getStart()),
+        diag::warn_printf_nonsensical_optional_amount)
+      << type
+      << CS.toString()
+      << getFormatSpecifierRange(startSpecifier, specifierLen)
+      << FixItHint::CreateRemoval(getFormatSpecifierRange(Amt.getStart(),
+          Amt.getConstantLength()));
+    break;
+
+  default:
+    S.Diag(getLocationOfByte(Amt.getStart()),
+        diag::warn_printf_nonsensical_optional_amount)
+      << type
+      << CS.toString()
+      << getFormatSpecifierRange(startSpecifier, specifierLen);
+    break;
+  }
+}
+
+void CheckPrintfHandler::HandleFlag(const analyze_printf::FormatSpecifier &FS,
+                                    const analyze_printf::OptionalFlag &flag,
+                                    const char *startSpecifier,
+                                    unsigned specifierLen) {
+  // Warn about pointless flag with a fixit removal.
+  const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier();
+  S.Diag(getLocationOfByte(flag.getPosition()),
+      diag::warn_printf_nonsensical_flag)
+    << flag.toString() << CS.toString()
+    << getFormatSpecifierRange(startSpecifier, specifierLen)
+    << FixItHint::CreateRemoval(getFormatSpecifierRange(flag.getPosition(), 1));
+}
+
+void CheckPrintfHandler::HandleIgnoredFlag(
+                                const analyze_printf::FormatSpecifier &FS,
+                                const analyze_printf::OptionalFlag &ignoredFlag,
+                                const analyze_printf::OptionalFlag &flag,
+                                const char *startSpecifier,
+                                unsigned specifierLen) {
+  // Warn about ignored flag with a fixit removal.
+  S.Diag(getLocationOfByte(ignoredFlag.getPosition()),
+      diag::warn_printf_ignored_flag)
+    << ignoredFlag.toString() << flag.toString()
+    << getFormatSpecifierRange(startSpecifier, specifierLen)
+    << FixItHint::CreateRemoval(getFormatSpecifierRange(
+        ignoredFlag.getPosition(), 1));
+}
+
 bool
 CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
                                             &FS,
@@ -1395,34 +1449,61 @@
     return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
   }
 
-  // Are we using '%n'?  Issue a warning about this being
-  // a possible security issue.
+  // Check for invalid use of field width
+  if (!FS.hasValidFieldWidth()) {
+    HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 1,
+        startSpecifier, specifierLen);
+  }
+
+  // Check for invalid use of precision
+  if (!FS.hasValidPrecision()) {
+    HandleInvalidAmount(FS, FS.getPrecision(), /* precision */ 1,
+        startSpecifier, specifierLen);
+  }
+
+  // Check each flag does not conflict with any other component.
+  if (!FS.hasValidLeadingZeros())
+    HandleFlag(FS, FS.hasLeadingZeros(), startSpecifier, specifierLen);
+  if (!FS.hasValidPlusPrefix())
+    HandleFlag(FS, FS.hasPlusPrefix(), startSpecifier, specifierLen);
+  // FIXME: the following lines are disabled due to clang assertions on
+  // highlights containing spaces.
+  // if (!FS.hasValidSpacePrefix())
+  //   HandleFlag(FS, FS.hasSpacePrefix(), startSpecifier, specifierLen);
+  if (!FS.hasValidAlternativeForm())
+    HandleFlag(FS, FS.hasAlternativeForm(), startSpecifier, specifierLen);
+  if (!FS.hasValidLeftJustified())
+    HandleFlag(FS, FS.isLeftJustified(), startSpecifier, specifierLen);
+
+  // Check that flags are not ignored by another flag
+  // FIXME: the following lines are disabled due to clang assertions on
+  // highlights containing spaces.
+  //if (FS.hasSpacePrefix() && FS.hasPlusPrefix()) // ' ' ignored by '+'
+  //  HandleIgnoredFlag(FS, FS.hasSpacePrefix(), FS.hasPlusPrefix(),
+  //      startSpecifier, specifierLen);
+  if (FS.hasLeadingZeros() && FS.isLeftJustified()) // '0' ignored by '-'
+    HandleIgnoredFlag(FS, FS.hasLeadingZeros(), FS.isLeftJustified(),
+            startSpecifier, specifierLen);
+
+  // Check the length modifier is valid with the given conversion specifier.
+  const LengthModifier &LM = FS.getLengthModifier();
+  if (!FS.hasValidLengthModifier())
+    S.Diag(getLocationOfByte(LM.getStart()),
+        diag::warn_printf_nonsensical_length)
+      << LM.toString() << CS.toString()
+      << getFormatSpecifierRange(startSpecifier, specifierLen)
+      << FixItHint::CreateRemoval(getFormatSpecifierRange(LM.getStart(),
+          LM.getLength()));
+
+  // Are we using '%n'?
   if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) {
+    // Issue a warning about this being a possible security issue.
     S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back)
       << getFormatSpecifierRange(startSpecifier, specifierLen);
     // Continue checking the other format specifiers.
     return true;
   }
 
-  if (CS.getKind() == ConversionSpecifier::VoidPtrArg) {
-    if (FS.getPrecision().getHowSpecified() != OptionalAmount::NotSpecified)
-      S.Diag(getLocationOfByte(CS.getStart()),
-             diag::warn_printf_nonsensical_precision)
-        << CS.getCharacters()
-        << getFormatSpecifierRange(startSpecifier, specifierLen);
-  }
-  if (CS.getKind() == ConversionSpecifier::VoidPtrArg ||
-      CS.getKind() == ConversionSpecifier::CStrArg) {
-    // FIXME: Instead of using "0", "+", etc., eventually get them from
-    // the FormatSpecifier.
-    if (FS.hasLeadingZeros())
-      HandleFlags(FS, "0", CS.getCharacters(), startSpecifier, specifierLen);
-    if (FS.hasPlusPrefix())
-      HandleFlags(FS, "+", CS.getCharacters(), startSpecifier, specifierLen);
-    if (FS.hasSpacePrefix())
-      HandleFlags(FS, " ", CS.getCharacters(), startSpecifier, specifierLen);
-  }
-
   // The remaining checks depend on the data arguments.
   if (HasVAListArg)
     return true;