adb: win32: handle incomplete UTF-8 in console output, other fixes Previously, the various adb_printf, adb_fwrite, etc. functions did not correctly handle the case of the passed buffer ending with an incomplete UTF-8 sequence. This is fixed by buffering up incomplete UTF-8 sequences in g_console_output_buffer (protected by the mutex g_console_output_buffer) and outputting it later once the full sequence is available. A unittest for the main worker function, ParseCompleteUTF8(), was added to adb_test. Other fixes: - Fix the return value of number-of-chars written to be number of UTF-8 bytes instead of number of UTF-16 characters. - Don't overwrite errno in success cases of various adb_printf, etc. functions. This might be excessive, but might be useful in the case when these functions are used for debugging/tracing. - Add missing UTF-8 stdio functions that aren't currently used by adb, but might be in the future: vprintf, putc, putchar, puts. - stdin_raw_init: If we can't get the console handle, don't call SetConsoleMode(). Not a big deal, but this will prevent erroneous trace output. Change-Id: I8730e8af92882c42b884ad921b39a17b54465085 Signed-off-by: Spencer Low <CompareAndSwap@gmail.com>

commit: a30b79a2d9af3850da23b62703bf3b76db076ab9 [log] [tgz]
author: Spencer Low <CompareAndSwap@gmail.com> Sun Nov 15 16:29:36 2015 -0800
committer: Spencer Low <CompareAndSwap@gmail.com> Thu Jan 28 16:31:28 2016 -0800
tree: 7cf773c2d80bf09a8c0e03bc4f6f671daf0c31f1
parent: 53529ecacdffbbffb5c9fbc4ab38103703dde32e [diff] [blame]
diff --git a/adb/sysdeps_win32_test.cpp b/adb/sysdeps_win32_test.cpp
index 1d40281..8f610cf 100755
--- a/adb/sysdeps_win32_test.cpp
+++ b/adb/sysdeps_win32_test.cpp

@@ -137,3 +137,60 @@
     // Make sure an invalid FD is handled correctly.
     EXPECT_EQ(0, unix_isatty(-1));
 }
+
+void TestParseCompleteUTF8(const char* buf, const size_t buf_size,
+                           const size_t expected_complete_bytes,
+                           const std::vector<char>& expected_remaining_bytes) {
+    std::vector<char> remaining_bytes;
+    const size_t complete_bytes = internal::ParseCompleteUTF8(buf, buf + buf_size,
+                                                              &remaining_bytes);
+    EXPECT_EQ(expected_complete_bytes, complete_bytes);
+    EXPECT_EQ(expected_remaining_bytes, remaining_bytes);
+}
+
+TEST(sysdeps_win32, ParseCompleteUTF8) {
+    const std::vector<std::vector<char>> multi_byte_sequences = {
+        { '\xc2', '\xa9' },                 // 2 byte UTF-8 sequence
+        { '\xe1', '\xb4', '\xa8' },         // 3 byte UTF-8 sequence
+        { '\xf0', '\x9f', '\x98', '\x80' }, // 4 byte UTF-8 sequence
+    };
+    std::vector<std::vector<char>> all_sequences = {
+        {},                                 // 0 bytes
+        { '\0' },                           // NULL byte
+        { 'a' },                            // 1 byte UTF-8 sequence
+    };
+    all_sequences.insert(all_sequences.end(), multi_byte_sequences.begin(),
+                         multi_byte_sequences.end());
+
+    // Vary a prefix of bytes in front of the sequence that we're actually interested in parsing.
+    for (const auto& prefix : all_sequences) {
+        // Parse (prefix + one byte of the sequence at a time)
+        for (const auto& seq : multi_byte_sequences) {
+            std::vector<char> buffer(prefix);
+
+            // For every byte of the sequence except the last
+            for (size_t i = 0; i < seq.size() - 1; ++i) {
+                buffer.push_back(seq[i]);
+
+                // When parsing an incomplete UTF-8 sequence, the amount of the buffer preceding
+                // the start of the incomplete UTF-8 sequence is valid. The remaining bytes are the
+                // bytes of the incomplete UTF-8 sequence.
+                TestParseCompleteUTF8(buffer.data(), buffer.size(), prefix.size(),
+                                      std::vector<char>(seq.begin(), seq.begin() + i + 1));
+            }
+
+            // For the last byte of the sequence
+            buffer.push_back(seq.back());
+            TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>());
+        }
+
+        // Parse (prefix (aka sequence) + invalid trailing bytes) to verify that the invalid
+        // trailing bytes are immediately "returned" to prevent them from being stuck in some
+        // buffer.
+        std::vector<char> buffer(prefix);
+        for (size_t i = 0; i < 8; ++i) {
+            buffer.push_back(0x80); // trailing byte
+            TestParseCompleteUTF8(buffer.data(), buffer.size(), buffer.size(), std::vector<char>());
+        }
+    }
+}
commit	a30b79a2d9af3850da23b62703bf3b76db076ab9	[log] [tgz]
author	Spencer Low <CompareAndSwap@gmail.com>	Sun Nov 15 16:29:36 2015 -0800
committer	Spencer Low <CompareAndSwap@gmail.com>	Thu Jan 28 16:31:28 2016 -0800
tree	7cf773c2d80bf09a8c0e03bc4f6f671daf0c31f1
parent	53529ecacdffbbffb5c9fbc4ab38103703dde32e [diff] [blame]