ART: Print out hex for NewStringUTF failure Print out the input data as a hex stream in case of a NewStringUTF input format failure. Bug: 22773761 Change-Id: I99a275bebb89564b2d71f297c7f5b9543cf4312d

commit: ef4afe9dc2e13500c3057aaf7f697e654b0b2782 [log] [tgz]
author: Andreas Gampe <agampe@google.com> Mon Jul 27 21:03:25 2015 -0700
committer: Andreas Gampe <agampe@google.com> Mon Jul 27 21:03:25 2015 -0700
tree: 9a11cff99fa435ad519a20b42cbb3f9232c24b79
parent: 9578d10b5866bfab63cc4218ec5ab3f6dd10c8ce [diff] [blame]
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 40b3669..38bc818 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc

@@ -16,6 +16,7 @@
 
 #include "check_jni.h"
 
+#include <iomanip>
 #include <sys/mman.h>
 #include <zlib.h>
 
@@ -1083,10 +1084,29 @@
     }
 
     const char* errorKind = nullptr;
-    uint8_t utf8 = CheckUtfBytes(bytes, &errorKind);
+    const uint8_t* utf8 = CheckUtfBytes(bytes, &errorKind);
     if (errorKind != nullptr) {
+      // This is an expensive loop that will resize often, but this isn't supposed to hit in
+      // practice anyways.
+      std::ostringstream oss;
+      oss << std::hex;
+      const uint8_t* tmp = reinterpret_cast<const uint8_t*>(bytes);
+      while (*tmp != 0) {
+        if (tmp == utf8) {
+          oss << "<";
+        }
+        oss << "0x" << std::setfill('0') << std::setw(2) << static_cast<uint32_t>(*tmp);
+        if (tmp == utf8) {
+          oss << '>';
+        }
+        tmp++;
+        if (*tmp != 0) {
+          oss << ' ';
+        }
+      }
+
       AbortF("input is not valid Modified UTF-8: illegal %s byte %#x\n"
-          "    string: '%s'", errorKind, utf8, bytes);
+          "    string: '%s'\n    input: '%s'", errorKind, *utf8, bytes, oss.str().c_str());
       return false;
     }
     return true;
@@ -1094,11 +1114,11 @@
 
   // Checks whether |bytes| is valid modified UTF-8. We also accept 4 byte UTF
   // sequences in place of encoded surrogate pairs.
-  static uint8_t CheckUtfBytes(const char* bytes, const char** errorKind) {
+  static const uint8_t* CheckUtfBytes(const char* bytes, const char** errorKind) {
     while (*bytes != '\0') {
-      uint8_t utf8 = *(bytes++);
+      const uint8_t* utf8 = reinterpret_cast<const uint8_t*>(bytes++);
       // Switch on the high four bits.
-      switch (utf8 >> 4) {
+      switch (*utf8 >> 4) {
       case 0x00:
       case 0x01:
       case 0x02:
@@ -1118,11 +1138,11 @@
         return utf8;
       case 0x0f:
         // Bit pattern 1111, which might be the start of a 4 byte sequence.
-        if ((utf8 & 0x08) == 0) {
+        if ((*utf8 & 0x08) == 0) {
           // Bit pattern 1111 0xxx, which is the start of a 4 byte sequence.
           // We consume one continuation byte here, and fall through to consume two more.
-          utf8 = *(bytes++);
-          if ((utf8 & 0xc0) != 0x80) {
+          utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+          if ((*utf8 & 0xc0) != 0x80) {
             *errorKind = "continuation";
             return utf8;
           }
@@ -1135,8 +1155,8 @@
         FALLTHROUGH_INTENDED;
       case 0x0e:
         // Bit pattern 1110, so there are two additional bytes.
-        utf8 = *(bytes++);
-        if ((utf8 & 0xc0) != 0x80) {
+        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+        if ((*utf8 & 0xc0) != 0x80) {
           *errorKind = "continuation";
           return utf8;
         }
@@ -1146,8 +1166,8 @@
       case 0x0c:
       case 0x0d:
         // Bit pattern 110x, so there is one additional byte.
-        utf8 = *(bytes++);
-        if ((utf8 & 0xc0) != 0x80) {
+        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
+        if ((*utf8 & 0xc0) != 0x80) {
           *errorKind = "continuation";
           return utf8;
         }
commit	ef4afe9dc2e13500c3057aaf7f697e654b0b2782	[log] [tgz]
author	Andreas Gampe <agampe@google.com>	Mon Jul 27 21:03:25 2015 -0700
committer	Andreas Gampe <agampe@google.com>	Mon Jul 27 21:03:25 2015 -0700
tree	9a11cff99fa435ad519a20b42cbb3f9232c24b79
parent	9578d10b5866bfab63cc4218ec5ab3f6dd10c8ce [diff] [blame]