license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame] | 1 | // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 4 | |
| 5 | #include "base/file_util.h" |
| 6 | |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 7 | #include <stdio.h> |
| 8 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 9 | #include <fstream> |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 10 | |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 11 | #include "base/file_path.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 12 | #include "base/logging.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 13 | #include "base/string_util.h" |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 14 | #include "unicode/uniset.h" |
| 15 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 16 | namespace file_util { |
| 17 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 18 | const wchar_t kExtensionSeparator = L'.'; |
| 19 | |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 20 | void PathComponents(const std::wstring& path, |
| 21 | std::vector<std::wstring>* components) { |
| 22 | DCHECK(components != NULL); |
| 23 | if (components == NULL) |
| 24 | return; |
| 25 | std::wstring::size_type start = 0; |
| 26 | std::wstring::size_type end = path.find(kPathSeparator, start); |
| 27 | |
| 28 | // Special case the "/" or "\" directory. On Windows with a drive letter, |
| 29 | // this code path won't hit, but the right thing should still happen. |
| 30 | // "E:\foo" will turn into "E:","foo". |
| 31 | if (end == start) { |
| 32 | components->push_back(std::wstring(path, 0, 1)); |
| 33 | start = end + 1; |
| 34 | end = path.find(kPathSeparator, start); |
| 35 | } |
| 36 | while (end != std::wstring::npos) { |
| 37 | std::wstring component = std::wstring(path, start, end - start); |
| 38 | components->push_back(component); |
| 39 | start = end + 1; |
| 40 | end = path.find(kPathSeparator, start); |
| 41 | } |
| 42 | std::wstring component = std::wstring(path, start); |
| 43 | components->push_back(component); |
| 44 | } |
| 45 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 46 | bool EndsWithSeparator(std::wstring* path) { |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 47 | return EndsWithSeparator(*path); |
| 48 | } |
| 49 | |
| 50 | bool EndsWithSeparator(const std::wstring& path) { |
erikkay@google.com | 60b2dfc | 2008-08-16 03:00:48 +0900 | [diff] [blame] | 51 | bool is_sep = (path.length() > 0 && |
| 52 | (path)[path.length() - 1] == kPathSeparator); |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 53 | return is_sep; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | void TrimTrailingSeparator(std::wstring* dir) { |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 57 | while (dir->length() > 1 && EndsWithSeparator(dir)) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 58 | dir->resize(dir->length() - 1); |
| 59 | } |
| 60 | |
| 61 | void UpOneDirectory(std::wstring* dir) { |
| 62 | TrimTrailingSeparator(dir); |
| 63 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 64 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 65 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 66 | dir->resize(last_sep); |
| 67 | } |
| 68 | |
| 69 | void UpOneDirectoryOrEmpty(std::wstring* dir) { |
| 70 | TrimTrailingSeparator(dir); |
| 71 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 72 | std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator); |
| 73 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 74 | dir->resize(last_sep); |
| 75 | else |
| 76 | dir->clear(); |
| 77 | } |
| 78 | |
| 79 | void TrimFilename(std::wstring* path) { |
| 80 | if (EndsWithSeparator(path)) { |
| 81 | TrimTrailingSeparator(path); |
| 82 | } else { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 83 | std::wstring::size_type last_sep = path->find_last_of(kPathSeparator); |
| 84 | if (last_sep != std::wstring::npos) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 85 | path->resize(last_sep); |
| 86 | } |
| 87 | } |
| 88 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 89 | std::wstring GetFilenameFromPath(const std::wstring& path) { |
erikkay@google.com | 60b2dfc | 2008-08-16 03:00:48 +0900 | [diff] [blame] | 90 | // TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test |
| 91 | // are exercising '/' as a path separator as well. |
| 92 | std::wstring::size_type pos = path.find_last_of(L"\\/"); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 93 | return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 94 | } |
| 95 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 96 | std::wstring GetFileExtensionFromPath(const std::wstring& path) { |
| 97 | std::wstring file_name = GetFilenameFromPath(path); |
| 98 | std::wstring::size_type last_dot = file_name.rfind(L'.'); |
| 99 | return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | void AppendToPath(std::wstring* path, const std::wstring& new_ending) { |
| 103 | if (!path) { |
| 104 | NOTREACHED(); |
| 105 | return; // Don't crash in this function in release builds. |
| 106 | } |
| 107 | |
| 108 | if (!EndsWithSeparator(path)) |
| 109 | path->push_back(kPathSeparator); |
| 110 | path->append(new_ending); |
| 111 | } |
| 112 | |
| 113 | void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) { |
| 114 | DCHECK(path); |
| 115 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 116 | const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator); |
| 117 | const std::wstring::size_type last_sep = path->rfind(kPathSeparator); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 118 | |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 119 | if (last_dot == std::wstring::npos || |
| 120 | (last_sep != std::wstring::npos && last_dot < last_sep)) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 121 | // The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo". |
| 122 | // We should just append the suffix to the entire path. |
| 123 | path->append(suffix); |
| 124 | return; |
| 125 | } |
| 126 | |
| 127 | path->insert(last_dot, suffix); |
| 128 | } |
| 129 | |
| 130 | void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { |
| 131 | DCHECK(file_name); |
| 132 | |
ericroman@google.com | dbff4f5 | 2008-08-19 01:00:38 +0900 | [diff] [blame] | 133 | // Control characters, formatting characters, non-characters, and |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 134 | // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). |
| 135 | // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx |
| 136 | // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx |
| 137 | // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they |
| 138 | // are legitimate in Arabic and some S/SE Asian scripts. However, when used |
| 139 | // elsewhere, they can be confusing/problematic. |
| 140 | // Also, consider wrapping the set with our Singleton class to create and |
| 141 | // freeze it only once. Note that there's a trade-off between memory and |
| 142 | // speed. |
| 143 | |
| 144 | UErrorCode status = U_ZERO_ERROR; |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 145 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 146 | UnicodeSet illegal_characters(UnicodeString( |
| 147 | L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status); |
| 148 | #else |
| 149 | UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE( |
| 150 | "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status); |
| 151 | #endif |
| 152 | DCHECK(U_SUCCESS(status)); |
| 153 | // Add non-characters. If this becomes a performance bottleneck by |
| 154 | // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead. |
| 155 | illegal_characters.add(0xFDD0, 0xFDEF); |
| 156 | for (int i = 0; i <= 0x10; ++i) { |
| 157 | int plane_base = 0x10000 * i; |
| 158 | illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF); |
| 159 | } |
| 160 | illegal_characters.freeze(); |
| 161 | DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000); |
| 162 | |
| 163 | // Remove leading and trailing whitespace. |
| 164 | TrimWhitespace(*file_name, TRIM_ALL, file_name); |
| 165 | |
| 166 | std::wstring::size_type i = 0; |
| 167 | std::wstring::size_type length = file_name->size(); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 168 | const wchar_t* wstr = file_name->data(); |
| 169 | #if defined(WCHAR_T_IS_UTF16) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 170 | // Using |span| method of UnicodeSet might speed things up a bit, but |
| 171 | // it's not likely to matter here. |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 172 | std::wstring temp; |
| 173 | temp.reserve(length); |
| 174 | while (i < length) { |
| 175 | UChar32 ucs4; |
| 176 | std::wstring::size_type prev = i; |
| 177 | U16_NEXT(wstr, i, length, ucs4); |
| 178 | if (illegal_characters.contains(ucs4)) { |
| 179 | temp.push_back(replace_char); |
| 180 | } else if (ucs4 < 0x10000) { |
| 181 | temp.push_back(ucs4); |
| 182 | } else { |
| 183 | temp.push_back(wstr[prev]); |
| 184 | temp.push_back(wstr[prev + 1]); |
| 185 | } |
| 186 | } |
| 187 | file_name->swap(temp); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 188 | #elif defined(WCHAR_T_IS_UTF32) |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 189 | while (i < length) { |
| 190 | if (illegal_characters.contains(wstr[i])) { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 191 | (*file_name)[i] = replace_char; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 192 | } |
erikkay@google.com | dfb51b2 | 2008-08-16 02:32:10 +0900 | [diff] [blame] | 193 | ++i; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 194 | } |
| 195 | #else |
| 196 | #error wchar_t* should be either UTF-16 or UTF-32 |
| 197 | #endif |
| 198 | } |
| 199 | |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 200 | // Appends the extension to file adding a '.' if extension doesn't contain one. |
| 201 | // This does nothing if extension is empty or '.'. This is used internally by |
| 202 | // ReplaceExtension. |
| 203 | static void AppendExtension(const std::wstring& extension, |
| 204 | std::wstring* file) { |
| 205 | if (!extension.empty() && extension != L".") { |
| 206 | if (extension[0] != L'.') |
| 207 | file->append(L"."); |
| 208 | file->append(extension); |
| 209 | } |
| 210 | } |
| 211 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 212 | void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) { |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 213 | const std::wstring::size_type last_dot = file_name->rfind(L'.'); |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 214 | if (last_dot == std::wstring::npos) { |
| 215 | // No extension, just append the supplied extension. |
| 216 | AppendExtension(extension, file_name); |
| 217 | return; |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 218 | } |
sky@google.com | 71e7c6f | 2008-09-20 02:32:18 +0900 | [diff] [blame] | 219 | const std::wstring::size_type last_separator = |
| 220 | file_name->rfind(kPathSeparator); |
| 221 | if (last_separator != std::wstring::npos && last_dot < last_separator) { |
| 222 | // File name doesn't have extension, but one of the directories does; don't |
| 223 | // replace it, just append the supplied extension. For example |
| 224 | // 'c:\tmp.bar\foo'. |
| 225 | AppendExtension(extension, file_name); |
| 226 | return; |
| 227 | } |
| 228 | std::wstring result = file_name->substr(0, last_dot); |
| 229 | AppendExtension(extension, &result); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 230 | file_name->swap(result); |
| 231 | } |
| 232 | |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 233 | bool ContentsEqual(const FilePath& filename1, const FilePath& filename2) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 234 | // We open the file in binary format even if they are text files because |
| 235 | // we are just comparing that bytes are exactly same in both files and not |
| 236 | // doing anything smart with text formatting. |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 237 | std::ifstream file1(filename1.value().c_str(), |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 238 | std::ios::in | std::ios::binary); |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 239 | std::ifstream file2(filename2.value().c_str(), |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 240 | std::ios::in | std::ios::binary); |
erikkay@google.com | 9fc57d0 | 2008-08-09 05:16:08 +0900 | [diff] [blame] | 241 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 242 | // Even if both files aren't openable (and thus, in some sense, "equal"), |
| 243 | // any unusable file yields a result of "false". |
| 244 | if (!file1.is_open() || !file2.is_open()) |
| 245 | return false; |
| 246 | |
| 247 | const int BUFFER_SIZE = 2056; |
| 248 | char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE]; |
| 249 | do { |
| 250 | file1.read(buffer1, BUFFER_SIZE); |
| 251 | file2.read(buffer2, BUFFER_SIZE); |
| 252 | |
| 253 | if ((file1.eof() && !file2.eof()) || |
| 254 | (!file1.eof() && file2.eof()) || |
| 255 | (file1.gcount() != file2.gcount()) || |
| 256 | (memcmp(buffer1, buffer2, file1.gcount()))) { |
| 257 | file1.close(); |
| 258 | file2.close(); |
| 259 | return false; |
| 260 | } |
| 261 | } while (!file1.eof() && !file2.eof()); |
| 262 | |
| 263 | file1.close(); |
| 264 | file2.close(); |
| 265 | return true; |
| 266 | } |
| 267 | |
| 268 | bool ReadFileToString(const std::wstring& path, std::string* contents) { |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 269 | FILE* file = OpenFile(path, "rb"); |
| 270 | if (!file) { |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 271 | return false; |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 272 | } |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 273 | |
| 274 | char buf[1 << 16]; |
| 275 | size_t len; |
| 276 | while ((len = fread(buf, 1, sizeof(buf), file)) > 0) { |
| 277 | contents->append(buf, len); |
| 278 | } |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 279 | CloseFile(file); |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 280 | |
| 281 | return true; |
| 282 | } |
| 283 | |
darin@google.com | 7f479f2 | 2008-09-26 10:04:08 +0900 | [diff] [blame] | 284 | bool GetFileSize(const std::wstring& file_path, int64* file_size) { |
| 285 | FileInfo info; |
| 286 | if (!GetFileInfo(file_path, &info)) |
| 287 | return false; |
| 288 | *file_size = info.size; |
| 289 | return true; |
| 290 | } |
| 291 | |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 292 | bool CloseFile(FILE* file) { |
sidchat@google.com | d3b2643 | 2008-10-22 02:14:45 +0900 | [diff] [blame] | 293 | if (file == NULL) |
| 294 | return true; |
mark@chromium.org | d1bafc6 | 2008-10-02 02:40:13 +0900 | [diff] [blame] | 295 | return fclose(file) == 0; |
| 296 | } |
| 297 | |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 298 | // Deprecated functions ---------------------------------------------------- |
| 299 | |
| 300 | bool AbsolutePath(std::wstring* path_str) { |
evanm@google.com | 104f22e | 2008-10-31 11:03:07 +0900 | [diff] [blame^] | 301 | FilePath path(FilePath::FromWStringHack(*path_str)); |
evanm@google.com | 874d167 | 2008-10-31 08:54:04 +0900 | [diff] [blame] | 302 | if (!AbsolutePath(&path)) |
| 303 | return false; |
| 304 | *path_str = path.ToWStringHack(); |
| 305 | return true; |
| 306 | } |
| 307 | bool Delete(const std::wstring& path, bool recursive) { |
| 308 | return Delete(FilePath::FromWStringHack(path), recursive); |
| 309 | } |
| 310 | bool Move(const std::wstring& from_path, const std::wstring& to_path) { |
| 311 | return Move(FilePath::FromWStringHack(from_path), |
| 312 | FilePath::FromWStringHack(to_path)); |
| 313 | } |
| 314 | bool CopyFile(const std::wstring& from_path, const std::wstring& to_path) { |
| 315 | return CopyFile(FilePath::FromWStringHack(from_path), |
| 316 | FilePath::FromWStringHack(to_path)); |
| 317 | } |
| 318 | bool CopyDirectory(const std::wstring& from_path, const std::wstring& to_path, |
| 319 | bool recursive) { |
| 320 | return CopyDirectory(FilePath::FromWStringHack(from_path), |
| 321 | FilePath::FromWStringHack(to_path), |
| 322 | recursive); |
| 323 | } |
| 324 | bool PathExists(const std::wstring& path) { |
| 325 | return PathExists(FilePath::FromWStringHack(path)); |
| 326 | } |
| 327 | bool DirectoryExists(const std::wstring& path) { |
| 328 | return DirectoryExists(FilePath::FromWStringHack(path)); |
| 329 | } |
| 330 | bool ContentsEqual(const std::wstring& filename1, |
| 331 | const std::wstring& filename2) { |
| 332 | return ContentsEqual(FilePath::FromWStringHack(filename1), |
| 333 | FilePath::FromWStringHack(filename2)); |
| 334 | } |
| 335 | bool CreateDirectory(const std::wstring& full_path) { |
| 336 | return CreateDirectory(FilePath::FromWStringHack(full_path)); |
| 337 | } |
| 338 | bool GetCurrentDirectory(std::wstring* path_str) { |
| 339 | FilePath path; |
| 340 | if (!GetCurrentDirectory(&path)) |
| 341 | return false; |
| 342 | *path_str = path.ToWStringHack(); |
| 343 | return true; |
| 344 | } |
| 345 | bool GetTempDir(std::wstring* path_str) { |
| 346 | FilePath path; |
| 347 | if (!GetTempDir(&path)) |
| 348 | return false; |
| 349 | *path_str = path.ToWStringHack(); |
| 350 | return true; |
| 351 | } |
| 352 | |
initial.commit | 3f4a732 | 2008-07-27 06:49:38 +0900 | [diff] [blame] | 353 | } // namespace |
license.bot | f003cfe | 2008-08-24 09:55:55 +0900 | [diff] [blame] | 354 | |