[Sema] Handle UTF-8 invalid format string specifiers
Improve invalid format string specifier handling by printing out
invalid specifiers characters with \x, \u and \U. Previously clang
would print gargabe whenever the character is unprintable.
Example, before:
NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier]
after:
NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier]
Differential Revision: http://reviews.llvm.org/D18296
rdar://problem/24672159
llvm-svn: 264752
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp
index 1c42ec0..badc710 100644
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp
@@ -15,6 +15,7 @@
#include "FormatStringParsing.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
using clang::analyze_format_string::ArgType;
using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +261,28 @@
return true;
}
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+ const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+ if (SpecifierBegin + 1 >= FmtStrEnd)
+ return false;
+
+ const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+ const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+ const char FirstByte = *SB;
+
+ // If the invalid specifier is a multibyte UTF-8 string, return the
+ // total length accordingly so that the conversion specifier can be
+ // properly updated to reflect a complete UTF-8 specifier.
+ unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+ if (NumBytes == 1)
+ return false;
+ if (SB + NumBytes > SE)
+ return false;
+
+ Len = NumBytes + 1;
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// Methods on ArgType.
//===----------------------------------------------------------------------===//