liblog: display valid utf8 characters with 'printable' log format This started as a change to use mbrtowc() instead of utf8_character_length() as mbrtowc() does everything that utf8_character_length() intends to do, but is a libc function. The change was further intended to add unit tests to ensure that these functions operate as intended. It turns out that utf8_character_length() returned an error for the utf8 characters that I tested, so this also has the side effect of allowing valid utf8 characters to be printed in the 'printable' log format, which was the original intention. Also, print the binary data as hex instead of octal, since it is a more suitable choice. Test: new unit tests, existing unit tests, logcat -v printable Change-Id: I4cc95aee81519411ef47892ca74eb31117c972d2

commit: 425317bf31806cc14e5c2fe79ff01a9ac64efc0c [log] [tgz]
author: Tom Cherry <tomcherry@google.com> Thu Apr 25 13:10:30 2019 -0700
committer: Tom Cherry <tomcherry@google.com> Mon Apr 29 09:14:37 2019 -0700
tree: 9bfb9e624749f84b4a25b444dbcfc3c2decb81ce
parent: 747015ce71a400bd28482e4d7b00dd473cce795a [diff]
diff --git a/liblog/logprint.cpp b/liblog/logprint.cpp
index cec5d96..6b5ea4c 100644
--- a/liblog/logprint.cpp
+++ b/liblog/logprint.cpp

@@ -33,6 +33,7 @@
 #include <string.h>
 #include <sys/param.h>
 #include <sys/types.h>
+#include <wchar.h>
 
 #include <cutils/list.h>
 #include <log/log.h>
@@ -1134,66 +1135,13 @@
 }
 
 /*
- * One utf8 character at a time
- *
- * Returns the length of the utf8 character in the buffer,
- * or -1 if illegal or truncated
- *
- * Open coded from libutils/Unicode.cpp, borrowed from utf8_length(),
- * can not remove from here because of library circular dependencies.
- * Expect one-day utf8_character_length with the same signature could
- * _also_ be part of libutils/Unicode.cpp if its usefullness needs to
- * propagate globally.
- */
-static ssize_t utf8_character_length(const char* src, size_t len) {
-  const char* cur = src;
-  const char first_char = *cur++;
-  static const uint32_t kUnicodeMaxCodepoint = 0x0010FFFF;
-  int32_t mask, to_ignore_mask;
-  size_t num_to_read;
-  uint32_t utf32;
-
-  if ((first_char & 0x80) == 0) { /* ASCII */
-    return first_char ? 1 : -1;
-  }
-
-  /*
-   * (UTF-8's character must not be like 10xxxxxx,
-   *  but 110xxxxx, 1110xxxx, ... or 1111110x)
-   */
-  if ((first_char & 0x40) == 0) {
-    return -1;
-  }
-
-  for (utf32 = 1, num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
-       num_to_read < 5 && (first_char & mask); num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
-    if (num_to_read > len) {
-      return -1;
-    }
-    if ((*cur & 0xC0) != 0x80) { /* can not be 10xxxxxx? */
-      return -1;
-    }
-    utf32 = (utf32 << 6) + (*cur++ & 0b00111111);
-  }
-  /* "first_char" must be (110xxxxx - 11110xxx) */
-  if (num_to_read >= 5) {
-    return -1;
-  }
-  to_ignore_mask |= mask;
-  utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
-  if (utf32 > kUnicodeMaxCodepoint) {
-    return -1;
-  }
-  return num_to_read;
-}
-
-/*
  * Convert to printable from message to p buffer, return string length. If p is
  * NULL, do not copy, but still return the expected string length.
  */
-static size_t convertPrintable(char* p, const char* message, size_t messageLen) {
+size_t convertPrintable(char* p, const char* message, size_t messageLen) {
   char* begin = p;
   bool print = p != NULL;
+  mbstate_t mb_state = {};
 
   while (messageLen) {
     char buf[6];
@@ -1201,11 +1149,10 @@
     if ((size_t)len > messageLen) {
       len = messageLen;
     }
-    len = utf8_character_length(message, len);
+    len = mbrtowc(nullptr, message, len, &mb_state);
 
     if (len < 0) {
-      snprintf(buf, sizeof(buf), ((messageLen > 1) && isdigit(message[1])) ? "\\%03o" : "\\%o",
-               *message & 0377);
+      snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
       len = 1;
     } else {
       buf[0] = '\0';
@@ -1225,7 +1172,7 @@
         } else if (*message == '\\') {
           strcpy(buf, "\\\\");
         } else if ((*message < ' ') || (*message & 0x80)) {
-          snprintf(buf, sizeof(buf), "\\%o", *message & 0377);
+          snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
         }
       }
       if (!buf[0]) {

diff --git a/liblog/tests/Android.bp b/liblog/tests/Android.bp
index 50755ce..d9d1a21 100644
--- a/liblog/tests/Android.bp
+++ b/liblog/tests/Android.bp

@@ -62,6 +62,7 @@
         "log_system_test.cpp",
         "log_time_test.cpp",
         "log_wrap_test.cpp",
+        "logprint_test.cpp",
     ],
     shared_libs: [
         "libcutils",

diff --git a/liblog/tests/logprint_test.cpp b/liblog/tests/logprint_test.cpp
new file mode 100644
index 0000000..7ca02ac
--- /dev/null
+++ b/liblog/tests/logprint_test.cpp

@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+size_t convertPrintable(char* p, const char* message, size_t messageLen);
+
+TEST(liblog, convertPrintable_ascii) {
+  auto input = "easy string, output same";
+  auto output_size = convertPrintable(nullptr, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(input));
+
+  char output[output_size];
+
+  output_size = convertPrintable(output, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(input));
+  EXPECT_STREQ(input, output);
+}
+
+TEST(liblog, convertPrintable_escapes) {
+  // Note that \t is not escaped.
+  auto input = "escape\a\b\t\v\f\r\\";
+  auto expected_output = "escape\\a\\b\t\\v\\f\\r\\\\";
+  auto output_size = convertPrintable(nullptr, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+
+  char output[output_size];
+
+  output_size = convertPrintable(output, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+  EXPECT_STREQ(expected_output, output);
+}
+
+TEST(liblog, convertPrintable_validutf8) {
+  auto input = u8"¢ह€𐍈";
+  auto output_size = convertPrintable(nullptr, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(input));
+
+  char output[output_size];
+
+  output_size = convertPrintable(output, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(input));
+  EXPECT_STREQ(input, output);
+}
+
+TEST(liblog, convertPrintable_invalidutf8) {
+  auto input = "\x80\xC2\x01\xE0\xA4\x06\xE0\x06\xF0\x90\x8D\x06\xF0\x90\x06\xF0\x0E";
+  auto expected_output =
+      "\\x80\\xC2\\x01\\xE0\\xA4\\x06\\xE0\\x06\\xF0\\x90\\x8D\\x06\\xF0\\x90\\x06\\xF0\\x0E";
+  auto output_size = convertPrintable(nullptr, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+
+  char output[output_size];
+
+  output_size = convertPrintable(output, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+  EXPECT_STREQ(expected_output, output);
+}
+
+TEST(liblog, convertPrintable_mixed) {
+  auto input =
+      u8"\x80\xC2¢ह€𐍈\x01\xE0\xA4\x06¢ह€𐍈\xE0\x06\a\b\xF0\x90¢ह€𐍈\x8D\x06\xF0\t\t\x90\x06\xF0\x0E";
+  auto expected_output =
+      u8"\\x80\\xC2¢ह€𐍈\\x01\\xE0\\xA4\\x06¢ह€𐍈\\xE0\\x06\\a\\b\\xF0\\x90¢ह€𐍈\\x8D\\x06\\xF0\t\t"
+      u8"\\x90\\x06\\xF0\\x0E";
+  auto output_size = convertPrintable(nullptr, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+
+  char output[output_size];
+
+  output_size = convertPrintable(output, input, strlen(input));
+  EXPECT_EQ(output_size, strlen(expected_output));
+  EXPECT_STREQ(expected_output, output);
+}
commit	425317bf31806cc14e5c2fe79ff01a9ac64efc0c	[log] [tgz]
author	Tom Cherry <tomcherry@google.com>	Thu Apr 25 13:10:30 2019 -0700
committer	Tom Cherry <tomcherry@google.com>	Mon Apr 29 09:14:37 2019 -0700
tree	9bfb9e624749f84b4a25b444dbcfc3c2decb81ce
parent	747015ce71a400bd28482e4d7b00dd473cce795a [diff]