Nicer display of unprintable source, and fix caret display for non-ascii text

Unprintable source in diagnostics is transformed to a printable form and then
displayed with reversed colors if possible. Unprintable characters are
displayed as <U+NNNN> while bytes that do not represent valid characters are
shown as <XX>.

Column adjustments to diagnostic carets, highlighted ranges, and fixups are
made both for characters escaped as above and for characters which are
printable but take up more than a single column.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@154980 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/Misc/message-length.c b/test/Misc/message-length.c
index 3e69b6a..a6f4f44 100644
--- a/test/Misc/message-length.c
+++ b/test/Misc/message-length.c
@@ -27,9 +27,8 @@
 
 #pragma STDC CX_LIMITED_RANGE    // some long comment text and a brace, eh {}
 
-
 // CHECK: FILE:23:78
-// CHECK: {{^  ...// some long comment text and a brace, eh {} }}
+// CHECK: {{^  ...// some long comment text and a brace, eh {}}}
 
 struct A { int x; };
 void h(struct A *a) {
diff --git a/test/Misc/unprintable.c b/test/Misc/unprintable.c
new file mode 100644
index 0000000..860503e
--- /dev/null
+++ b/test/Misc/unprintable.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s 2>&1 | FileCheck -strict-whitespace %s
+
+int main() {
+    int i;
+    if((i==/*👿*/1));
+
+// CHECK: {{^    if\(\(i==/\*<U\+1F47F>\*/1\)\);}}
+
+// CHECK: {{^        ~\^~~~~~~~~~~~~~~~}}
+// CHECK: {{^       ~ \^               ~}}
+
+    /* 👿 */ "👿berhund";
+
+// CHECK: {{^    /\* <U\+1F47F> \*/ "<U\+1F47F>berhund";}}
+// CHECK: {{^                    \^~~~~~~~~~~~~~~~~~}}
+}
\ No newline at end of file
diff --git a/test/Misc/wrong-encoding.c b/test/Misc/wrong-encoding.c
new file mode 100644
index 0000000..bd1cf3d
--- /dev/null
+++ b/test/Misc/wrong-encoding.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s
+
+void foo() {
+
+  "§Ã"; // ø
+// CHECK: {{^  "<A7><C3>"; // <F8>}}
+// CHECK: {{^  \^}}
+
+  /* þ« */ const char *d = "¥";
+
+// CHECK: {{^  /\* <FE><AB> \*/ const char \*d = "<A5>";}}
+// CHECK: {{^                                 \^}}
+
+// CHECK: {{^  "<A7><C3>"; // <F8>}}
+// CHECK: {{^  \^~~~~~~~~~}}
+}