Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <windows.h> |
| 18 | |
Elliott Hughes | 4f71319 | 2015-12-04 22:00:26 -0800 | [diff] [blame] | 19 | #include "android-base/utf8.h" |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 20 | |
| 21 | #include <fcntl.h> |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 22 | #include <stdio.h> |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 23 | |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 24 | #include <algorithm> |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 25 | #include <string> |
| 26 | |
Elliott Hughes | 4f71319 | 2015-12-04 22:00:26 -0800 | [diff] [blame] | 27 | #include "android-base/logging.h" |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 28 | |
| 29 | namespace android { |
| 30 | namespace base { |
| 31 | |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 32 | // Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar(). |
| 33 | static void SetErrnoFromLastError() { |
| 34 | switch (GetLastError()) { |
| 35 | case ERROR_NO_UNICODE_TRANSLATION: |
| 36 | errno = EILSEQ; |
| 37 | break; |
| 38 | default: |
| 39 | errno = EINVAL; |
| 40 | break; |
| 41 | } |
| 42 | } |
| 43 | |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 44 | bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) { |
| 45 | utf8->clear(); |
| 46 | |
| 47 | if (size == 0) { |
| 48 | return true; |
| 49 | } |
| 50 | |
| 51 | // TODO: Consider using std::wstring_convert once libcxx is supported on |
| 52 | // Windows. |
| 53 | |
| 54 | // Only Vista or later has this flag that causes WideCharToMultiByte() to |
| 55 | // return an error on invalid characters. |
| 56 | const DWORD flags = |
| 57 | #if (WINVER >= 0x0600) |
| 58 | WC_ERR_INVALID_CHARS; |
| 59 | #else |
| 60 | 0; |
| 61 | #endif |
| 62 | |
| 63 | const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size, |
| 64 | NULL, 0, NULL, NULL); |
| 65 | if (chars_required <= 0) { |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 66 | SetErrnoFromLastError(); |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 67 | return false; |
| 68 | } |
| 69 | |
| 70 | // This could potentially throw a std::bad_alloc exception. |
| 71 | utf8->resize(chars_required); |
| 72 | |
| 73 | const int result = WideCharToMultiByte(CP_UTF8, flags, utf16, size, |
| 74 | &(*utf8)[0], chars_required, NULL, |
| 75 | NULL); |
| 76 | if (result != chars_required) { |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 77 | SetErrnoFromLastError(); |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 78 | CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result |
| 79 | << " chars to buffer of " << chars_required << " chars"; |
| 80 | utf8->clear(); |
| 81 | return false; |
| 82 | } |
| 83 | |
| 84 | return true; |
| 85 | } |
| 86 | |
| 87 | bool WideToUTF8(const wchar_t* utf16, std::string* utf8) { |
| 88 | // Compute string length of NULL-terminated string with wcslen(). |
| 89 | return WideToUTF8(utf16, wcslen(utf16), utf8); |
| 90 | } |
| 91 | |
| 92 | bool WideToUTF8(const std::wstring& utf16, std::string* utf8) { |
| 93 | // Use the stored length of the string which allows embedded NULL characters |
| 94 | // to be converted. |
| 95 | return WideToUTF8(utf16.c_str(), utf16.length(), utf8); |
| 96 | } |
| 97 | |
| 98 | // Internal helper function that takes MultiByteToWideChar() flags. |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 99 | static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16, |
| 100 | const DWORD flags) { |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 101 | utf16->clear(); |
| 102 | |
| 103 | if (size == 0) { |
| 104 | return true; |
| 105 | } |
| 106 | |
| 107 | // TODO: Consider using std::wstring_convert once libcxx is supported on |
| 108 | // Windows. |
| 109 | const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size, |
| 110 | NULL, 0); |
| 111 | if (chars_required <= 0) { |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 112 | SetErrnoFromLastError(); |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 113 | return false; |
| 114 | } |
| 115 | |
| 116 | // This could potentially throw a std::bad_alloc exception. |
| 117 | utf16->resize(chars_required); |
| 118 | |
| 119 | const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size, |
| 120 | &(*utf16)[0], chars_required); |
| 121 | if (result != chars_required) { |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 122 | SetErrnoFromLastError(); |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 123 | CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result |
| 124 | << " chars to buffer of " << chars_required << " chars"; |
| 125 | utf16->clear(); |
| 126 | return false; |
| 127 | } |
| 128 | |
| 129 | return true; |
| 130 | } |
| 131 | |
| 132 | bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) { |
| 133 | // If strictly interpreting as UTF-8 succeeds, return success. |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 134 | if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) { |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 135 | return true; |
| 136 | } |
| 137 | |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 138 | const int saved_errno = errno; |
| 139 | |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 140 | // Fallback to non-strict interpretation, allowing invalid characters and |
| 141 | // converting as best as possible, and return false to signify a problem. |
Spencer Low | d21dc82 | 2015-11-12 15:20:15 -0800 | [diff] [blame] | 142 | (void)UTF8ToWideWithFlags(utf8, size, utf16, 0); |
| 143 | errno = saved_errno; |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 144 | return false; |
| 145 | } |
| 146 | |
| 147 | bool UTF8ToWide(const char* utf8, std::wstring* utf16) { |
| 148 | // Compute string length of NULL-terminated string with strlen(). |
| 149 | return UTF8ToWide(utf8, strlen(utf8), utf16); |
| 150 | } |
| 151 | |
| 152 | bool UTF8ToWide(const std::string& utf8, std::wstring* utf16) { |
| 153 | // Use the stored length of the string which allows embedded NULL characters |
| 154 | // to be converted. |
| 155 | return UTF8ToWide(utf8.c_str(), utf8.length(), utf16); |
| 156 | } |
| 157 | |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 158 | static bool isDriveLetter(wchar_t c) { |
| 159 | return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); |
| 160 | } |
| 161 | |
| 162 | bool UTF8PathToWindowsLongPath(const char* utf8, std::wstring* utf16) { |
| 163 | if (!UTF8ToWide(utf8, utf16)) { |
| 164 | return false; |
| 165 | } |
| 166 | // Note: Although most Win32 File I/O API are limited to MAX_PATH (260 |
| 167 | // characters), the CreateDirectory API is limited to 248 characters. |
| 168 | if (utf16->length() >= 248) { |
| 169 | // If path is of the form "x:\" or "x:/" |
| 170 | if (isDriveLetter((*utf16)[0]) && (*utf16)[1] == L':' && |
| 171 | ((*utf16)[2] == L'\\' || (*utf16)[2] == L'/')) { |
| 172 | // Append long path prefix, and make sure there are no unix-style |
| 173 | // separators to ensure a fully compliant Win32 long path string. |
| 174 | utf16->insert(0, LR"(\\?\)"); |
| 175 | std::replace(utf16->begin(), utf16->end(), L'/', L'\\'); |
| 176 | } |
| 177 | } |
| 178 | return true; |
| 179 | } |
| 180 | |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 181 | // Versions of standard library APIs that support UTF-8 strings. |
| 182 | namespace utf8 { |
| 183 | |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 184 | FILE* fopen(const char* name, const char* mode) { |
| 185 | std::wstring name_utf16; |
| 186 | if (!UTF8PathToWindowsLongPath(name, &name_utf16)) { |
| 187 | return nullptr; |
| 188 | } |
| 189 | |
| 190 | std::wstring mode_utf16; |
| 191 | if (!UTF8ToWide(mode, &mode_utf16)) { |
| 192 | return nullptr; |
| 193 | } |
| 194 | |
| 195 | return _wfopen(name_utf16.c_str(), mode_utf16.c_str()); |
| 196 | } |
| 197 | |
| 198 | int mkdir(const char* name, mode_t mode) { |
| 199 | std::wstring name_utf16; |
| 200 | if (!UTF8PathToWindowsLongPath(name, &name_utf16)) { |
| 201 | return -1; |
| 202 | } |
| 203 | |
| 204 | return _wmkdir(name_utf16.c_str()); |
| 205 | } |
| 206 | |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 207 | int open(const char* name, int flags, ...) { |
| 208 | std::wstring name_utf16; |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 209 | if (!UTF8PathToWindowsLongPath(name, &name_utf16)) { |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 210 | return -1; |
| 211 | } |
| 212 | |
| 213 | int mode = 0; |
| 214 | if ((flags & O_CREAT) != 0) { |
| 215 | va_list args; |
| 216 | va_start(args, flags); |
| 217 | mode = va_arg(args, int); |
| 218 | va_end(args); |
| 219 | } |
| 220 | |
| 221 | return _wopen(name_utf16.c_str(), flags, mode); |
| 222 | } |
| 223 | |
| 224 | int unlink(const char* name) { |
| 225 | std::wstring name_utf16; |
Renaud Paquay | e3e7813 | 2017-05-22 15:24:35 -0700 | [diff] [blame] | 226 | if (!UTF8PathToWindowsLongPath(name, &name_utf16)) { |
Elliott Hughes | c1fd492 | 2015-11-11 18:02:29 +0000 | [diff] [blame] | 227 | return -1; |
| 228 | } |
| 229 | |
| 230 | return _wunlink(name_utf16.c_str()); |
| 231 | } |
| 232 | |
| 233 | } // namespace utf8 |
| 234 | } // namespace base |
| 235 | } // namespace android |