blob: ade8db1272fdfae1855f1d44c876168f3bf5dddb [file] [log] [blame]
license.botf003cfe2008-08-24 09:55:55 +09001// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
initial.commit3f4a7322008-07-27 06:49:38 +09004
5#include "base/file_util.h"
6
mark@chromium.orgd1bafc62008-10-02 02:40:13 +09007#include <stdio.h>
8
initial.commit3f4a7322008-07-27 06:49:38 +09009#include <fstream>
initial.commit3f4a7322008-07-27 06:49:38 +090010
evanm@google.com874d1672008-10-31 08:54:04 +090011#include "base/file_path.h"
initial.commit3f4a7322008-07-27 06:49:38 +090012#include "base/logging.h"
initial.commit3f4a7322008-07-27 06:49:38 +090013#include "base/string_util.h"
initial.commit3f4a7322008-07-27 06:49:38 +090014#include "unicode/uniset.h"
15
initial.commit3f4a7322008-07-27 06:49:38 +090016namespace file_util {
17
initial.commit3f4a7322008-07-27 06:49:38 +090018const wchar_t kExtensionSeparator = L'.';
19
erikkay@google.comdfb51b22008-08-16 02:32:10 +090020void PathComponents(const std::wstring& path,
21 std::vector<std::wstring>* components) {
22 DCHECK(components != NULL);
23 if (components == NULL)
24 return;
25 std::wstring::size_type start = 0;
26 std::wstring::size_type end = path.find(kPathSeparator, start);
27
28 // Special case the "/" or "\" directory. On Windows with a drive letter,
29 // this code path won't hit, but the right thing should still happen.
30 // "E:\foo" will turn into "E:","foo".
31 if (end == start) {
32 components->push_back(std::wstring(path, 0, 1));
33 start = end + 1;
34 end = path.find(kPathSeparator, start);
35 }
36 while (end != std::wstring::npos) {
37 std::wstring component = std::wstring(path, start, end - start);
38 components->push_back(component);
39 start = end + 1;
40 end = path.find(kPathSeparator, start);
41 }
42 std::wstring component = std::wstring(path, start);
43 components->push_back(component);
44}
45
initial.commit3f4a7322008-07-27 06:49:38 +090046bool EndsWithSeparator(std::wstring* path) {
erikkay@google.comdfb51b22008-08-16 02:32:10 +090047 return EndsWithSeparator(*path);
48}
49
50bool EndsWithSeparator(const std::wstring& path) {
erikkay@google.com60b2dfc2008-08-16 03:00:48 +090051 bool is_sep = (path.length() > 0 &&
52 (path)[path.length() - 1] == kPathSeparator);
erikkay@google.comdfb51b22008-08-16 02:32:10 +090053 return is_sep;
initial.commit3f4a7322008-07-27 06:49:38 +090054}
55
56void TrimTrailingSeparator(std::wstring* dir) {
erikkay@google.comdfb51b22008-08-16 02:32:10 +090057 while (dir->length() > 1 && EndsWithSeparator(dir))
initial.commit3f4a7322008-07-27 06:49:38 +090058 dir->resize(dir->length() - 1);
59}
60
61void UpOneDirectory(std::wstring* dir) {
62 TrimTrailingSeparator(dir);
63
erikkay@google.com9fc57d02008-08-09 05:16:08 +090064 std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
65 if (last_sep != std::wstring::npos)
initial.commit3f4a7322008-07-27 06:49:38 +090066 dir->resize(last_sep);
67}
68
69void UpOneDirectoryOrEmpty(std::wstring* dir) {
70 TrimTrailingSeparator(dir);
71
erikkay@google.com9fc57d02008-08-09 05:16:08 +090072 std::wstring::size_type last_sep = dir->find_last_of(kPathSeparator);
73 if (last_sep != std::wstring::npos)
initial.commit3f4a7322008-07-27 06:49:38 +090074 dir->resize(last_sep);
75 else
76 dir->clear();
77}
78
79void TrimFilename(std::wstring* path) {
80 if (EndsWithSeparator(path)) {
81 TrimTrailingSeparator(path);
82 } else {
erikkay@google.com9fc57d02008-08-09 05:16:08 +090083 std::wstring::size_type last_sep = path->find_last_of(kPathSeparator);
84 if (last_sep != std::wstring::npos)
initial.commit3f4a7322008-07-27 06:49:38 +090085 path->resize(last_sep);
86 }
87}
88
erikkay@google.com9fc57d02008-08-09 05:16:08 +090089std::wstring GetFilenameFromPath(const std::wstring& path) {
erikkay@google.com60b2dfc2008-08-16 03:00:48 +090090 // TODO(erikkay): fix this - it's not using kPathSeparator, but win unit test
91 // are exercising '/' as a path separator as well.
92 std::wstring::size_type pos = path.find_last_of(L"\\/");
erikkay@google.com9fc57d02008-08-09 05:16:08 +090093 return std::wstring(path, pos == std::wstring::npos ? 0 : pos+1);
initial.commit3f4a7322008-07-27 06:49:38 +090094}
95
erikkay@google.com9fc57d02008-08-09 05:16:08 +090096std::wstring GetFileExtensionFromPath(const std::wstring& path) {
97 std::wstring file_name = GetFilenameFromPath(path);
98 std::wstring::size_type last_dot = file_name.rfind(L'.');
99 return std::wstring(last_dot == std::wstring::npos? L"" : file_name, last_dot+1);
initial.commit3f4a7322008-07-27 06:49:38 +0900100}
101
102void AppendToPath(std::wstring* path, const std::wstring& new_ending) {
103 if (!path) {
104 NOTREACHED();
105 return; // Don't crash in this function in release builds.
106 }
107
108 if (!EndsWithSeparator(path))
109 path->push_back(kPathSeparator);
110 path->append(new_ending);
111}
112
113void InsertBeforeExtension(std::wstring* path, const std::wstring& suffix) {
114 DCHECK(path);
115
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900116 const std::wstring::size_type last_dot = path->rfind(kExtensionSeparator);
117 const std::wstring::size_type last_sep = path->rfind(kPathSeparator);
initial.commit3f4a7322008-07-27 06:49:38 +0900118
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900119 if (last_dot == std::wstring::npos ||
120 (last_sep != std::wstring::npos && last_dot < last_sep)) {
initial.commit3f4a7322008-07-27 06:49:38 +0900121 // The path looks something like "C:\pics.old\jojo" or "C:\pics\jojo".
122 // We should just append the suffix to the entire path.
123 path->append(suffix);
124 return;
125 }
126
127 path->insert(last_dot, suffix);
128}
129
130void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
131 DCHECK(file_name);
132
ericroman@google.comdbff4f52008-08-19 01:00:38 +0900133 // Control characters, formatting characters, non-characters, and
initial.commit3f4a7322008-07-27 06:49:38 +0900134 // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
135 // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
136 // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
137 // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
138 // are legitimate in Arabic and some S/SE Asian scripts. However, when used
139 // elsewhere, they can be confusing/problematic.
140 // Also, consider wrapping the set with our Singleton class to create and
141 // freeze it only once. Note that there's a trade-off between memory and
142 // speed.
143
144 UErrorCode status = U_ZERO_ERROR;
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900145#if defined(WCHAR_T_IS_UTF16)
initial.commit3f4a7322008-07-27 06:49:38 +0900146 UnicodeSet illegal_characters(UnicodeString(
147 L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
148#else
149 UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
150 "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
151#endif
152 DCHECK(U_SUCCESS(status));
153 // Add non-characters. If this becomes a performance bottleneck by
154 // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead.
155 illegal_characters.add(0xFDD0, 0xFDEF);
156 for (int i = 0; i <= 0x10; ++i) {
157 int plane_base = 0x10000 * i;
158 illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
159 }
160 illegal_characters.freeze();
161 DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
162
163 // Remove leading and trailing whitespace.
164 TrimWhitespace(*file_name, TRIM_ALL, file_name);
165
166 std::wstring::size_type i = 0;
167 std::wstring::size_type length = file_name->size();
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900168 const wchar_t* wstr = file_name->data();
169#if defined(WCHAR_T_IS_UTF16)
initial.commit3f4a7322008-07-27 06:49:38 +0900170 // Using |span| method of UnicodeSet might speed things up a bit, but
171 // it's not likely to matter here.
initial.commit3f4a7322008-07-27 06:49:38 +0900172 std::wstring temp;
173 temp.reserve(length);
174 while (i < length) {
175 UChar32 ucs4;
176 std::wstring::size_type prev = i;
177 U16_NEXT(wstr, i, length, ucs4);
178 if (illegal_characters.contains(ucs4)) {
179 temp.push_back(replace_char);
180 } else if (ucs4 < 0x10000) {
181 temp.push_back(ucs4);
182 } else {
183 temp.push_back(wstr[prev]);
184 temp.push_back(wstr[prev + 1]);
185 }
186 }
187 file_name->swap(temp);
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900188#elif defined(WCHAR_T_IS_UTF32)
initial.commit3f4a7322008-07-27 06:49:38 +0900189 while (i < length) {
190 if (illegal_characters.contains(wstr[i])) {
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900191 (*file_name)[i] = replace_char;
initial.commit3f4a7322008-07-27 06:49:38 +0900192 }
erikkay@google.comdfb51b22008-08-16 02:32:10 +0900193 ++i;
initial.commit3f4a7322008-07-27 06:49:38 +0900194 }
195#else
196#error wchar_t* should be either UTF-16 or UTF-32
197#endif
198}
199
sky@google.com71e7c6f2008-09-20 02:32:18 +0900200// Appends the extension to file adding a '.' if extension doesn't contain one.
201// This does nothing if extension is empty or '.'. This is used internally by
202// ReplaceExtension.
203static void AppendExtension(const std::wstring& extension,
204 std::wstring* file) {
205 if (!extension.empty() && extension != L".") {
206 if (extension[0] != L'.')
207 file->append(L".");
208 file->append(extension);
209 }
210}
211
initial.commit3f4a7322008-07-27 06:49:38 +0900212void ReplaceExtension(std::wstring* file_name, const std::wstring& extension) {
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900213 const std::wstring::size_type last_dot = file_name->rfind(L'.');
sky@google.com71e7c6f2008-09-20 02:32:18 +0900214 if (last_dot == std::wstring::npos) {
215 // No extension, just append the supplied extension.
216 AppendExtension(extension, file_name);
217 return;
initial.commit3f4a7322008-07-27 06:49:38 +0900218 }
sky@google.com71e7c6f2008-09-20 02:32:18 +0900219 const std::wstring::size_type last_separator =
220 file_name->rfind(kPathSeparator);
221 if (last_separator != std::wstring::npos && last_dot < last_separator) {
222 // File name doesn't have extension, but one of the directories does; don't
223 // replace it, just append the supplied extension. For example
224 // 'c:\tmp.bar\foo'.
225 AppendExtension(extension, file_name);
226 return;
227 }
228 std::wstring result = file_name->substr(0, last_dot);
229 AppendExtension(extension, &result);
initial.commit3f4a7322008-07-27 06:49:38 +0900230 file_name->swap(result);
231}
232
evanm@google.com874d1672008-10-31 08:54:04 +0900233bool ContentsEqual(const FilePath& filename1, const FilePath& filename2) {
initial.commit3f4a7322008-07-27 06:49:38 +0900234 // We open the file in binary format even if they are text files because
235 // we are just comparing that bytes are exactly same in both files and not
236 // doing anything smart with text formatting.
evanm@google.com874d1672008-10-31 08:54:04 +0900237 std::ifstream file1(filename1.value().c_str(),
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900238 std::ios::in | std::ios::binary);
evanm@google.com874d1672008-10-31 08:54:04 +0900239 std::ifstream file2(filename2.value().c_str(),
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900240 std::ios::in | std::ios::binary);
erikkay@google.com9fc57d02008-08-09 05:16:08 +0900241
initial.commit3f4a7322008-07-27 06:49:38 +0900242 // Even if both files aren't openable (and thus, in some sense, "equal"),
243 // any unusable file yields a result of "false".
244 if (!file1.is_open() || !file2.is_open())
245 return false;
246
247 const int BUFFER_SIZE = 2056;
248 char buffer1[BUFFER_SIZE], buffer2[BUFFER_SIZE];
249 do {
250 file1.read(buffer1, BUFFER_SIZE);
251 file2.read(buffer2, BUFFER_SIZE);
252
253 if ((file1.eof() && !file2.eof()) ||
254 (!file1.eof() && file2.eof()) ||
255 (file1.gcount() != file2.gcount()) ||
256 (memcmp(buffer1, buffer2, file1.gcount()))) {
257 file1.close();
258 file2.close();
259 return false;
260 }
261 } while (!file1.eof() && !file2.eof());
262
263 file1.close();
264 file2.close();
265 return true;
266}
267
268bool ReadFileToString(const std::wstring& path, std::string* contents) {
mark@chromium.orgd1bafc62008-10-02 02:40:13 +0900269 FILE* file = OpenFile(path, "rb");
270 if (!file) {
initial.commit3f4a7322008-07-27 06:49:38 +0900271 return false;
mark@chromium.orgd1bafc62008-10-02 02:40:13 +0900272 }
initial.commit3f4a7322008-07-27 06:49:38 +0900273
274 char buf[1 << 16];
275 size_t len;
276 while ((len = fread(buf, 1, sizeof(buf), file)) > 0) {
277 contents->append(buf, len);
278 }
mark@chromium.orgd1bafc62008-10-02 02:40:13 +0900279 CloseFile(file);
initial.commit3f4a7322008-07-27 06:49:38 +0900280
281 return true;
282}
283
darin@google.com7f479f22008-09-26 10:04:08 +0900284bool GetFileSize(const std::wstring& file_path, int64* file_size) {
285 FileInfo info;
286 if (!GetFileInfo(file_path, &info))
287 return false;
288 *file_size = info.size;
289 return true;
290}
291
mark@chromium.orgd1bafc62008-10-02 02:40:13 +0900292bool CloseFile(FILE* file) {
sidchat@google.comd3b26432008-10-22 02:14:45 +0900293 if (file == NULL)
294 return true;
mark@chromium.orgd1bafc62008-10-02 02:40:13 +0900295 return fclose(file) == 0;
296}
297
evanm@google.com874d1672008-10-31 08:54:04 +0900298// Deprecated functions ----------------------------------------------------
299
300bool AbsolutePath(std::wstring* path_str) {
evanm@google.com104f22e2008-10-31 11:03:07 +0900301 FilePath path(FilePath::FromWStringHack(*path_str));
evanm@google.com874d1672008-10-31 08:54:04 +0900302 if (!AbsolutePath(&path))
303 return false;
304 *path_str = path.ToWStringHack();
305 return true;
306}
307bool Delete(const std::wstring& path, bool recursive) {
308 return Delete(FilePath::FromWStringHack(path), recursive);
309}
310bool Move(const std::wstring& from_path, const std::wstring& to_path) {
311 return Move(FilePath::FromWStringHack(from_path),
312 FilePath::FromWStringHack(to_path));
313}
314bool CopyFile(const std::wstring& from_path, const std::wstring& to_path) {
315 return CopyFile(FilePath::FromWStringHack(from_path),
316 FilePath::FromWStringHack(to_path));
317}
318bool CopyDirectory(const std::wstring& from_path, const std::wstring& to_path,
319 bool recursive) {
320 return CopyDirectory(FilePath::FromWStringHack(from_path),
321 FilePath::FromWStringHack(to_path),
322 recursive);
323}
324bool PathExists(const std::wstring& path) {
325 return PathExists(FilePath::FromWStringHack(path));
326}
327bool DirectoryExists(const std::wstring& path) {
328 return DirectoryExists(FilePath::FromWStringHack(path));
329}
330bool ContentsEqual(const std::wstring& filename1,
331 const std::wstring& filename2) {
332 return ContentsEqual(FilePath::FromWStringHack(filename1),
333 FilePath::FromWStringHack(filename2));
334}
335bool CreateDirectory(const std::wstring& full_path) {
336 return CreateDirectory(FilePath::FromWStringHack(full_path));
337}
338bool GetCurrentDirectory(std::wstring* path_str) {
339 FilePath path;
340 if (!GetCurrentDirectory(&path))
341 return false;
342 *path_str = path.ToWStringHack();
343 return true;
344}
345bool GetTempDir(std::wstring* path_str) {
346 FilePath path;
347 if (!GetTempDir(&path))
348 return false;
349 *path_str = path.ToWStringHack();
350 return true;
351}
352
initial.commit3f4a7322008-07-27 06:49:38 +0900353} // namespace
license.botf003cfe2008-08-24 09:55:55 +0900354