[Sema] Handle UTF-8 invalid format string specifiers Improve invalid format string specifier handling by printing out invalid specifiers characters with \x, \u and \U. Previously clang would print gargabe whenever the character is unprintable. Example, before: NSLog(@"%\u25B9"); => warning: invalid conversion specifier ' [-Wformat-invalid-specifier] after: NSLog(@"%\u25B9"); => warning: invalid conversion specifier '\u25b9' [-Wformat-invalid-specifier] Differential Revision: http://reviews.llvm.org/D18296 rdar://problem/24672159 llvm-svn: 264752

commit: 0c18d03d9157090cb379219a8b91f3104869f358 [log] [tgz]
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> Tue Mar 29 17:35:02 2016 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> Tue Mar 29 17:35:02 2016 +0000
tree: 4691c44c47d7b0f79928619e73b170efb9047c65
parent: ac400900da8e0f756a73739a85151b01e42500ea [diff] [blame]
diff --git a/clang/lib/Analysis/FormatString.cpp b/clang/lib/Analysis/FormatString.cpp
index 1c42ec0..badc710 100644
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp

@@ -15,6 +15,7 @@
 #include "FormatStringParsing.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
 
 using clang::analyze_format_string::ArgType;
 using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +261,28 @@
   return true;
 }
 
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+    const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+  if (SpecifierBegin + 1 >= FmtStrEnd)
+    return false;
+
+  const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+  const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+  const char FirstByte = *SB;
+
+  // If the invalid specifier is a multibyte UTF-8 string, return the
+  // total length accordingly so that the conversion specifier can be
+  // properly updated to reflect a complete UTF-8 specifier.
+  unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+  if (NumBytes == 1)
+    return false;
+  if (SB + NumBytes > SE)
+    return false;
+
+  Len = NumBytes + 1;
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Methods on ArgType.
 //===----------------------------------------------------------------------===//
commit	0c18d03d9157090cb379219a8b91f3104869f358	[log] [tgz]
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	Tue Mar 29 17:35:02 2016 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	Tue Mar 29 17:35:02 2016 +0000
tree	4691c44c47d7b0f79928619e73b170efb9047c65
parent	ac400900da8e0f756a73739a85151b01e42500ea [diff] [blame]