Add FilePath::FromUTF8Unsafe() and FilePath::AsUTF8Unsafe().

The logic is moved from value_conversions.cc.
FilePath::FromUTF8Unsafe() should only be used when you are
sure that the input string is UTF-8. See the function comments
for why they have "Unsafe" in their names.

BUG=none
TEST=base_unittests

Review URL: http://codereview.chromium.org/8402008

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@108246 0039d316-1c4b-4281-b951-d872f2087c98


CrOS-Libchrome-Original-Commit: 45440519a239ffad82425e08115d909fd82a1e9d
diff --git a/base/file_path.cc b/base/file_path.cc
index 35361be..3666ff2 100644
--- a/base/file_path.cc
+++ b/base/file_path.cc
@@ -527,6 +527,14 @@
   return "";
 }
 
+std::string FilePath::AsUTF8Unsafe() const {
+#if defined(OS_MACOSX) || defined(OS_CHROMEOS)
+  return value();
+#else
+  return WideToUTF8(base::SysNativeMBToWide(value()));
+#endif
+}
+
 // The *Hack functions are temporary while we fix the remainder of the code.
 // Remember to remove the #includes at the top when you remove these.
 
@@ -534,6 +542,16 @@
 FilePath FilePath::FromWStringHack(const std::wstring& wstring) {
   return FilePath(base::SysWideToNativeMB(wstring));
 }
+
+// static
+FilePath FilePath::FromUTF8Unsafe(const std::string& utf8) {
+#if defined(OS_MACOSX) || defined(OS_CHROMEOS)
+  return FilePath(utf8);
+#else
+  return FilePath(base::SysWideToNativeMB(UTF8ToWide(utf8)));
+#endif
+}
+
 #elif defined(OS_WIN)
 string16 FilePath::LossyDisplayName() const {
   return path_;
@@ -545,10 +563,19 @@
   return "";
 }
 
+std::string FilePath::AsUTF8Unsafe() const {
+  return WideToUTF8(value());
+}
+
 // static
 FilePath FilePath::FromWStringHack(const std::wstring& wstring) {
   return FilePath(wstring);
 }
+
+// static
+FilePath FilePath::FromUTF8Unsafe(const std::string& utf8) {
+  return FilePath(UTF8ToWide(utf8));
+}
 #endif
 
 void FilePath::WriteToPickle(Pickle* pickle) {
diff --git a/base/file_path.h b/base/file_path.h
index 6396833..8342c30 100644
--- a/base/file_path.h
+++ b/base/file_path.h
@@ -17,6 +17,7 @@
 //
 // * The encoding need not be specified on POSIX systems, although some
 //   POSIX-compliant systems do specify an encoding.  Mac OS X uses UTF-8.
+//   Chrome OS also uses UTF-8.
 //   Linux does not specify an encoding, but in practice, the locale's
 //   character set may be used.
 //
@@ -294,6 +295,21 @@
   // known-ASCII filename.
   std::string MaybeAsASCII() const;
 
+  // Return the path as UTF-8.
+  //
+  // This function is *unsafe* as there is no way to tell what encoding is
+  // used in file names on POSIX systems other than Mac and Chrome OS,
+  // although UTF-8 is practically used everywhere these days. To mitigate
+  // the encoding issue, this function internally calls
+  // SysNativeMBToWide() on POSIX systems other than Mac and Chrome OS,
+  // per assumption that the current locale's encoding is used in file
+  // names, but this isn't a perfect solution.
+  //
+  // Once it becomes safe to to stop caring about non-UTF-8 file names,
+  // the SysNativeMBToWide() hack will be removed from the code, along
+  // with "Unsafe" in the function name.
+  std::string AsUTF8Unsafe() const;
+
   // Older Chromium code assumes that paths are always wstrings.
   // This function converts wstrings to FilePaths, and is
   // useful to smooth porting that old code to the FilePath API.
@@ -312,6 +328,16 @@
   //   ever use the result of that again as a path.
   static FilePath FromWStringHack(const std::wstring& wstring);
 
+  // Returns a FilePath object from a path name in UTF-8. This function
+  // should only be used for cases where you are sure that the input
+  // string is UTF-8.
+  //
+  // Like AsUTF8Unsafe(), this function is unsafe. This function
+  // internally calls SysWideToNativeMB() on POSIX systems other than Mac
+  // and Chrome OS, to mitigate the encoding issue. See the comment at
+  // AsUTF8Unsafe() for details.
+  static FilePath FromUTF8Unsafe(const std::string& utf8);
+
   void WriteToPickle(Pickle* pickle);
   bool ReadFromPickle(Pickle* pickle, void** iter);
 
diff --git a/base/file_path_unittest.cc b/base/file_path_unittest.cc
index 6eb106b..9890e80 100644
--- a/base/file_path_unittest.cc
+++ b/base/file_path_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -37,6 +37,11 @@
   int expected;
 };
 
+struct UTF8TestData {
+  const FilePath::CharType* native;
+  const char* utf8;
+};
+
 // file_util winds up using autoreleased objects on the Mac, so this needs
 // to be a PlatformTest
 class FilePathTest : public PlatformTest {
@@ -1044,6 +1049,30 @@
   }
 }
 
+TEST_F(FilePathTest, FromUTF8Unsafe_And_AsUTF8Unsafe) {
+  const struct UTF8TestData cases[] = {
+    { FPL("foo.txt"), "foo.txt" },
+    // "aeo" with accents. Use http://0xcc.net/jsescape/ to decode them.
+    { FPL("\u00E0\u00E8\u00F2.txt"), "\xC3\xA0\xC3\xA8\xC3\xB2.txt" },
+    // Full-width "ABC".
+    { FPL("\uFF21\uFF22\uFF23.txt"),
+      "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3.txt" },
+  };
+
+  for (size_t i = 0; i < arraysize(cases); ++i) {
+    // Test FromUTF8Unsafe() works.
+    FilePath from_utf8 = FilePath::FromUTF8Unsafe(cases[i].utf8);
+    EXPECT_EQ(cases[i].native, from_utf8.value())
+        << "i: " << i << ", input: " << cases[i].native;
+    // Test AsUTF8Unsafe() works.
+    FilePath from_native = FilePath(cases[i].native);
+    EXPECT_EQ(cases[i].utf8, from_native.AsUTF8Unsafe())
+        << "i: " << i << ", input: " << cases[i].native;
+    // Test the two file paths are identical.
+    EXPECT_EQ(from_utf8.value(), from_native.value());
+  }
+}
+
 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
 TEST_F(FilePathTest, NormalizeWindowsPathSeparators) {
   const struct UnaryTestData cases[] = {
@@ -1086,4 +1115,5 @@
               "i: " << i << ", input: " << input.value();
   }
 }
+
 #endif
diff --git a/base/value_conversions.cc b/base/value_conversions.cc
index 64513d0..f4957a4 100644
--- a/base/value_conversions.cc
+++ b/base/value_conversions.cc
@@ -5,39 +5,14 @@
 #include "base/value_conversions.h"
 
 #include "base/file_path.h"
-#include "base/sys_string_conversions.h"
-#include "base/utf_string_conversions.h"
 #include "base/values.h"
 
 namespace base {
 
-namespace {
-
 // |Value| internally stores strings in UTF-8, so we have to convert from the
 // system native code to UTF-8 and back.
-
-std::string FilePathToUTF8(const FilePath& file_path) {
-#if defined(OS_POSIX)
-  return WideToUTF8(SysNativeMBToWide(file_path.value()));
-#else
-  return UTF16ToUTF8(file_path.value());
-#endif
-}
-
-FilePath UTF8ToFilePath(const std::string& str) {
-  FilePath::StringType result;
-#if defined(OS_POSIX)
-  result = SysWideToNativeMB(UTF8ToWide(str));
-#elif defined(OS_WIN)
-  result = UTF8ToUTF16(str);
-#endif
-  return FilePath(result);
-}
-
-}  // namespace
-
 StringValue* CreateFilePathValue(const FilePath& in_value) {
-  return new StringValue(FilePathToUTF8(in_value));
+  return new StringValue(in_value.AsUTF8Unsafe());
 }
 
 bool GetValueAsFilePath(const Value& value, FilePath* file_path) {
@@ -45,7 +20,7 @@
   if (!value.GetAsString(&str))
     return false;
   if (file_path)
-    *file_path = UTF8ToFilePath(str);
+    *file_path = FilePath::FromUTF8Unsafe(str);
   return true;
 }