license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | |
| 5 | #include "base/file_util.h" |
| 6 | |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame^] | 7 | #include <stdio.h> |
| 8 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 9 | #include <fstream> |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 10 | |
| 11 | #include "base/logging.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 12 | #include "base/string_util.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 13 | #include "unicode/uniset.h" |
| 14 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 15 | namespace file_util { |
| 16 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 17 | const wchar_t kExtensionSeparator = L'.'; |
| 18 | |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 19 | void PathComponents(const std::wstring& path, |
| 20 | std::vector<std::wstring>* components) { |
| 21 | DCHECK(components != NULL); |
| 22 | if (components == NULL) |
| 23 | return; |
| 24 | std::wstring::size_type start = 0; |
| 25 | std::wstring::size_type end = path.find(kPathSeparator, start); |
| 26 | |
| 27 | // Special case the "/" or "\" directory. On Windows with a drive letter, |
| 28 | // this code path won't hit, but the right thing should still happen. |
| 29 | // "E:\foo" will turn into "E:","foo". |
| 30 | if (end == start) { |
| 31 | components->push_back(std::wstring(path, 0, 1)); |
| 32 | start = end + 1; |
| 33 | end = path.find(kPathSeparator, start); |
| 34 | } |
| 35 | while (end != std::wstring::npos) { |
| 36 | std::wstring component = std::wstring(path, start, end - start); |
| 37 | components->push_back(component); |
| 38 | start = end + 1; |
| 39 | end = path.find(kPathSeparator, start); |
| 40 | } |
| 41 | std::wstring component = std::wstring(path, start); |
| 42 | components->push_back(component); |
| 43 | } |
| 44 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 45 | bool EndsWithSeparator(std::wstring* path) { |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 46 | return EndsWithSeparator(*path); |
| 47 | } |
| 48 | |
| 49 | bool EndsWithSeparator(const std::wstring& path) { |
erikkay@google.com | 60b2dfc | 2008-08-16 03:00:48 +0900 | [diff] [blame] | 50 | bool is_sep = (path.length() > 0 && |
| 51 | (path)[path.length() - 1] == kPathSeparator); |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 52 | return is_sep; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 53 | } |
| 54 | |
| 55 | void TrimTrailingSeparator(std::wstring* dir) { |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 56 | while (dir->length() > 1 && EndsWithSeparator(dir)) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 57 | dir->resize(dir->length() - 1); |
| 58 | } |
| 59 | |
| 60 | void UpOneDirectory(std::wstring* dir) { |
| 61 | TrimTrailingSeparator(dir); |
| 62 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 63 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 64 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 65 | dir->resize(last_sep); |
| 66 | } |
| 67 | |
| 68 | void UpOneDirectoryOrEmpty(std::wstring* dir) { |
| 69 | TrimTrailingSeparator(dir); |
| 70 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 71 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 72 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 73 | dir->resize(last_sep); |
| 74 | else |
| 75 | dir->clear(); |
| 76 | } |
| 77 | |
| 78 | void TrimFilename(std::wstring* path) { |
| 79 | if (EndsWithSeparator(path)) { |
| 80 | TrimTrailingSeparator(path); |
| 81 | } else { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 82 | std::wstring::size_type last_sep = path->find_last_of(kPathSeparator); |
| 83 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 84 | path->resize(last_sep); |
| 85 | } |
| 86 | } |
| 87 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 88 | std::wstring GetFilenameFromPath(const std::wstring& path) { |
erikkay@google.com | 60b2dfc | 2008-08-16 03:00:48 +0900 | [diff] [blame] | 89 | // TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test |
| 90 | // are exercising '/' as a path separator as well. |
| 91 | std::wstring::size_type pos = path.find_last_of(L"\\/"); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 92 | return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 93 | } |
| 94 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 95 | std::wstring GetFileExtensionFromPath(const std::wstring& path) { |
| 96 | std::wstring file_name = GetFilenameFromPath(path); |
| 97 | std::wstring::size_type last_dot = file_name.rfind(L'.'); |
| 98 | return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | void AppendToPath(std::wstring* path, const std::wstring& new_ending) { |
| 102 | if (!path) { |
| 103 | NOTREACHED(); |
| 104 | return; // Don't crash in this function in release builds. |
| 105 | } |
| 106 | |
| 107 | if (!EndsWithSeparator(path)) |
| 108 | path->push_back(kPathSeparator); |
| 109 | path->append(new_ending); |
| 110 | } |
| 111 | |
| 112 | void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) { |
| 113 | DCHECK(path); |
| 114 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 115 | const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator); |
| 116 | const std::wstring::size_type last_sep = path->rfind(kPathSeparator); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 117 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 118 | if (last_dot == std::wstring::npos || |
| 119 | (last_sep != std::wstring::npos && last_dot < last_sep)) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 120 | // The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo". |
| 121 | // We should just append the suffix to the entire path. |
| 122 | path->append(suffix); |
| 123 | return; |
| 124 | } |
| 125 | |
| 126 | path->insert(last_dot, suffix); |
| 127 | } |
| 128 | |
| 129 | void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { |
| 130 | DCHECK(file_name); |
| 131 | |
ericroman@google.com | dbff4f5 | 2008-08-19 01:00:38 +0900 | [diff] [blame] | 132 | // Control characters, formatting characters, non-characters, and |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 133 | // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). |
| 134 | // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
| 135 | // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
| 136 | // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they |
| 137 | // are legitimate in Arabic and some S/SE Asian scripts. However, when used |
| 138 | // elsewhere, they can be confusing/problematic. |
| 139 | // Also, consider wrapping the set with our Singleton class to create and |
| 140 | // freeze it only once. Note that there's a trade-off between memory and |
| 141 | // speed. |
| 142 | |
| 143 | UErrorCode status = U_ZERO_ERROR; |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 144 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 145 | UnicodeSet illegal_characters(UnicodeString( |
| 146 | L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status); |
| 147 | #else |
| 148 | UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE( |
| 149 | "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status); |
| 150 | #endif |
| 151 | DCHECK(U_SUCCESS(status)); |
| 152 | // Add non-characters. If this becomes a performance bottleneck by |
| 153 | // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead. |
| 154 | illegal_characters.add(0xFDD0, 0xFDEF); |
| 155 | for (int i = 0; i <= 0x10; ++i) { |
| 156 | int plane_base = 0x10000 * i; |
| 157 | illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 158 | } |
| 159 | illegal_characters.freeze(); |
| 160 | DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000); |
| 161 | |
| 162 | // Remove leading and trailing whitespace. |
| 163 | TrimWhitespace(*file_name, TRIM_ALL, file_name); |
| 164 | |
| 165 | std::wstring::size_type i = 0; |
| 166 | std::wstring::size_type length = file_name->size(); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 167 | const wchar_t* wstr = file_name->data(); |
| 168 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 169 | // Using |span| method of UnicodeSet might speed things up a bit, but |
| 170 | // it's not likely to matter here. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 171 | std::wstring temp; |
| 172 | temp.reserve(length); |
| 173 | while (i < length) { |
| 174 | UChar32 ucs4; |
| 175 | std::wstring::size_type prev = i; |
| 176 | U16_NEXT(wstr, i, length, ucs4); |
| 177 | if (illegal_characters.contains(ucs4)) { |
| 178 | temp.push_back(replace_char); |
| 179 | } else if (ucs4 < 0x10000) { |
| 180 | temp.push_back(ucs4); |
| 181 | } else { |
| 182 | temp.push_back(wstr[prev]); |
| 183 | temp.push_back(wstr[prev + 1]); |
| 184 | } |
| 185 | } |
| 186 | file_name->swap(temp); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 187 | #elif defined(WCHAR_T_IS_UTF32) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 188 | while (i < length) { |
| 189 | if (illegal_characters.contains(wstr[i])) { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 190 | (*file_name)[i] = replace_char; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 191 | } |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 192 | ++i; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 193 | } |
| 194 | #else |
| 195 | #error wchar_t* should be either UTF-16 or UTF-32 |
| 196 | #endif |
| 197 | } |
| 198 | |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 199 | // Appends the extension to file adding a '.' if extension doesn't contain one. |
| 200 | // This does nothing if extension is empty or '.'. This is used internally by |
| 201 | // ReplaceExtension. |
| 202 | static void AppendExtension(const std::wstring& extension, |
| 203 | std::wstring* file) { |
| 204 | if (!extension.empty() && extension != L".") { |
| 205 | if (extension[0] != L'.') |
| 206 | file->append(L"."); |
| 207 | file->append(extension); |
| 208 | } |
| 209 | } |
| 210 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 211 | void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 212 | const std::wstring::size_type last_dot = file_name->rfind(L'.'); |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 213 | if (last_dot == std::wstring::npos) { |
| 214 | // No extension, just append the supplied extension. |
| 215 | AppendExtension(extension, file_name); |
| 216 | return; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 217 | } |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 218 | const std::wstring::size_type last_separator = |
| 219 | file_name->rfind(kPathSeparator); |
| 220 | if (last_separator != std::wstring::npos && last_dot < last_separator) { |
| 221 | // File name doesn't have extension, but one of the directories does; don't |
| 222 | // replace it, just append the supplied extension. For example |
| 223 | // 'c:\tmp.bar\foo'. |
| 224 | AppendExtension(extension, file_name); |
| 225 | return; |
| 226 | } |
| 227 | std::wstring result = file_name->substr(0, last_dot); |
| 228 | AppendExtension(extension, &result); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 229 | file_name->swap(result); |
| 230 | } |
| 231 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 232 | bool ContentsEqual(const std::wstring& filename1, |
| 233 | const std::wstring& filename2) { |
| 234 | // We open the file in binary format even if they are text files because |
| 235 | // we are just comparing that bytes are exactly same in both files and not |
| 236 | // doing anything smart with text formatting. |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 237 | #if defined(OS_WIN) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 238 | std::ifstream file1(filename1.c_str(), std::ios::in | std::ios::binary); |
| 239 | std::ifstream file2(filename2.c_str(), std::ios::in | std::ios::binary); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 240 | #elif defined(OS_POSIX) |
| 241 | std::ifstream file1(WideToUTF8(filename1).c_str(), |
| 242 | std::ios::in | std::ios::binary); |
| 243 | std::ifstream file2(WideToUTF8(filename2).c_str(), |
| 244 | std::ios::in | std::ios::binary); |
| 245 | #endif |
| 246 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 247 | // Even if both files aren't openable (and thus, in some sense, "equal"), |
| 248 | // any unusable file yields a result of "false". |
| 249 | if (!file1.is_open() || !file2.is_open()) |
| 250 | return false; |
| 251 | |
| 252 | const int BUFFER_SIZE = 2056; |
| 253 | char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE]; |
| 254 | do { |
| 255 | file1.read(buffer1, BUFFER_SIZE); |
| 256 | file2.read(buffer2, BUFFER_SIZE); |
| 257 | |
| 258 | if ((file1.eof() && !file2.eof()) || |
| 259 | (!file1.eof() && file2.eof()) || |
| 260 | (file1.gcount() != file2.gcount()) || |
| 261 | (memcmp(buffer1, buffer2, file1.gcount()))) { |
| 262 | file1.close(); |
| 263 | file2.close(); |
| 264 | return false; |
| 265 | } |
| 266 | } while (!file1.eof() && !file2.eof()); |
| 267 | |
| 268 | file1.close(); |
| 269 | file2.close(); |
| 270 | return true; |
| 271 | } |
| 272 | |
| 273 | bool ReadFileToString(const std::wstring& path, std::string* contents) { |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame^] | 274 | FILE* file = OpenFile(path, "rb"); |
| 275 | if (!file) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 276 | return false; |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame^] | 277 | } |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 278 | |
| 279 | char buf[1 << 16]; |
| 280 | size_t len; |
| 281 | while ((len = fread(buf, 1, sizeof(buf), file)) > 0) { |
| 282 | contents->append(buf, len); |
| 283 | } |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame^] | 284 | CloseFile(file); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 285 | |
| 286 | return true; |
| 287 | } |
| 288 | |
darin@google.com | 7f479f2 | 2008-09-26 10:04:08 +0900 | [diff] [blame] | 289 | bool GetFileSize(const std::wstring& file_path, int64* file_size) { |
| 290 | FileInfo info; |
| 291 | if (!GetFileInfo(file_path, &info)) |
| 292 | return false; |
| 293 | *file_size = info.size; |
| 294 | return true; |
| 295 | } |
| 296 | |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame^] | 297 | bool CloseFile(FILE* file) { |
| 298 | return fclose(file) == 0; |
| 299 | } |
| 300 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 301 | } // namespace |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame] | 302 | |