Add FilePath::FromUTF8Unsafe() and FilePath::AsUTF8Unsafe().
The logic is moved from value_conversions.cc.
FilePath::FromUTF8Unsafe() should only be used when you are
sure that the input string is UTF-8. See the function comments
for why they have "Unsafe" in their names.
BUG=none
TEST=base_unittests
Review URL: http://codereview.chromium.org/8402008
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@108246 0039d316-1c4b-4281-b951-d872f2087c98
CrOS-Libchrome-Original-Commit: 45440519a239ffad82425e08115d909fd82a1e9d
diff --git a/base/file_path.cc b/base/file_path.cc
index 35361be..3666ff2 100644
--- a/base/file_path.cc
+++ b/base/file_path.cc
@@ -527,6 +527,14 @@
return "";
}
+std::string FilePath::AsUTF8Unsafe() const {
+#if defined(OS_MACOSX) || defined(OS_CHROMEOS)
+ return value();
+#else
+ return WideToUTF8(base::SysNativeMBToWide(value()));
+#endif
+}
+
// The *Hack functions are temporary while we fix the remainder of the code.
// Remember to remove the #includes at the top when you remove these.
@@ -534,6 +542,16 @@
FilePath FilePath::FromWStringHack(const std::wstring& wstring) {
return FilePath(base::SysWideToNativeMB(wstring));
}
+
+// static
+FilePath FilePath::FromUTF8Unsafe(const std::string& utf8) {
+#if defined(OS_MACOSX) || defined(OS_CHROMEOS)
+ return FilePath(utf8);
+#else
+ return FilePath(base::SysWideToNativeMB(UTF8ToWide(utf8)));
+#endif
+}
+
#elif defined(OS_WIN)
string16 FilePath::LossyDisplayName() const {
return path_;
@@ -545,10 +563,19 @@
return "";
}
+std::string FilePath::AsUTF8Unsafe() const {
+ return WideToUTF8(value());
+}
+
// static
FilePath FilePath::FromWStringHack(const std::wstring& wstring) {
return FilePath(wstring);
}
+
+// static
+FilePath FilePath::FromUTF8Unsafe(const std::string& utf8) {
+ return FilePath(UTF8ToWide(utf8));
+}
#endif
void FilePath::WriteToPickle(Pickle* pickle) {
diff --git a/base/file_path.h b/base/file_path.h
index 6396833..8342c30 100644
--- a/base/file_path.h
+++ b/base/file_path.h
@@ -17,6 +17,7 @@
//
// * The encoding need not be specified on POSIX systems, although some
// POSIX-compliant systems do specify an encoding. Mac OS X uses UTF-8.
+// Chrome OS also uses UTF-8.
// Linux does not specify an encoding, but in practice, the locale's
// character set may be used.
//
@@ -294,6 +295,21 @@
// known-ASCII filename.
std::string MaybeAsASCII() const;
+ // Return the path as UTF-8.
+ //
+ // This function is *unsafe* as there is no way to tell what encoding is
+ // used in file names on POSIX systems other than Mac and Chrome OS,
+ // although UTF-8 is practically used everywhere these days. To mitigate
+ // the encoding issue, this function internally calls
+ // SysNativeMBToWide() on POSIX systems other than Mac and Chrome OS,
+ // per assumption that the current locale's encoding is used in file
+ // names, but this isn't a perfect solution.
+ //
+ // Once it becomes safe to to stop caring about non-UTF-8 file names,
+ // the SysNativeMBToWide() hack will be removed from the code, along
+ // with "Unsafe" in the function name.
+ std::string AsUTF8Unsafe() const;
+
// Older Chromium code assumes that paths are always wstrings.
// This function converts wstrings to FilePaths, and is
// useful to smooth porting that old code to the FilePath API.
@@ -312,6 +328,16 @@
// ever use the result of that again as a path.
static FilePath FromWStringHack(const std::wstring& wstring);
+ // Returns a FilePath object from a path name in UTF-8. This function
+ // should only be used for cases where you are sure that the input
+ // string is UTF-8.
+ //
+ // Like AsUTF8Unsafe(), this function is unsafe. This function
+ // internally calls SysWideToNativeMB() on POSIX systems other than Mac
+ // and Chrome OS, to mitigate the encoding issue. See the comment at
+ // AsUTF8Unsafe() for details.
+ static FilePath FromUTF8Unsafe(const std::string& utf8);
+
void WriteToPickle(Pickle* pickle);
bool ReadFromPickle(Pickle* pickle, void** iter);
diff --git a/base/file_path_unittest.cc b/base/file_path_unittest.cc
index 6eb106b..9890e80 100644
--- a/base/file_path_unittest.cc
+++ b/base/file_path_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -37,6 +37,11 @@
int expected;
};
+struct UTF8TestData {
+ const FilePath::CharType* native;
+ const char* utf8;
+};
+
// file_util winds up using autoreleased objects on the Mac, so this needs
// to be a PlatformTest
class FilePathTest : public PlatformTest {
@@ -1044,6 +1049,30 @@
}
}
+TEST_F(FilePathTest, FromUTF8Unsafe_And_AsUTF8Unsafe) {
+ const struct UTF8TestData cases[] = {
+ { FPL("foo.txt"), "foo.txt" },
+ // "aeo" with accents. Use http://0xcc.net/jsescape/ to decode them.
+ { FPL("\u00E0\u00E8\u00F2.txt"), "\xC3\xA0\xC3\xA8\xC3\xB2.txt" },
+ // Full-width "ABC".
+ { FPL("\uFF21\uFF22\uFF23.txt"),
+ "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3.txt" },
+ };
+
+ for (size_t i = 0; i < arraysize(cases); ++i) {
+ // Test FromUTF8Unsafe() works.
+ FilePath from_utf8 = FilePath::FromUTF8Unsafe(cases[i].utf8);
+ EXPECT_EQ(cases[i].native, from_utf8.value())
+ << "i: " << i << ", input: " << cases[i].native;
+ // Test AsUTF8Unsafe() works.
+ FilePath from_native = FilePath(cases[i].native);
+ EXPECT_EQ(cases[i].utf8, from_native.AsUTF8Unsafe())
+ << "i: " << i << ", input: " << cases[i].native;
+ // Test the two file paths are identical.
+ EXPECT_EQ(from_utf8.value(), from_native.value());
+ }
+}
+
#if defined(FILE_PATH_USES_WIN_SEPARATORS)
TEST_F(FilePathTest, NormalizeWindowsPathSeparators) {
const struct UnaryTestData cases[] = {
@@ -1086,4 +1115,5 @@
"i: " << i << ", input: " << input.value();
}
}
+
#endif
diff --git a/base/value_conversions.cc b/base/value_conversions.cc
index 64513d0..f4957a4 100644
--- a/base/value_conversions.cc
+++ b/base/value_conversions.cc
@@ -5,39 +5,14 @@
#include "base/value_conversions.h"
#include "base/file_path.h"
-#include "base/sys_string_conversions.h"
-#include "base/utf_string_conversions.h"
#include "base/values.h"
namespace base {
-namespace {
-
// |Value| internally stores strings in UTF-8, so we have to convert from the
// system native code to UTF-8 and back.
-
-std::string FilePathToUTF8(const FilePath& file_path) {
-#if defined(OS_POSIX)
- return WideToUTF8(SysNativeMBToWide(file_path.value()));
-#else
- return UTF16ToUTF8(file_path.value());
-#endif
-}
-
-FilePath UTF8ToFilePath(const std::string& str) {
- FilePath::StringType result;
-#if defined(OS_POSIX)
- result = SysWideToNativeMB(UTF8ToWide(str));
-#elif defined(OS_WIN)
- result = UTF8ToUTF16(str);
-#endif
- return FilePath(result);
-}
-
-} // namespace
-
StringValue* CreateFilePathValue(const FilePath& in_value) {
- return new StringValue(FilePathToUTF8(in_value));
+ return new StringValue(in_value.AsUTF8Unsafe());
}
bool GetValueAsFilePath(const Value& value, FilePath* file_path) {
@@ -45,7 +20,7 @@
if (!value.GetAsString(&str))
return false;
if (file_path)
- *file_path = UTF8ToFilePath(str);
+ *file_path = FilePath::FromUTF8Unsafe(str);
return true;
}