Add FilePath::FinalExtension() to avoid double extensions (.tar.gz) for file selector

Windows and OS X file selectors break badly on double-extensions. GTK and
Chrome OS handle it slightly better, but CrOS has a slight bug and GTK's
handling of file extensions is minimal, so not a whole lot changed. See
https://codereview.chromium.org/4883003/#msg14 for full analysis of
platform behavior.

There is some logic that does benefit from long extensions (renaming "foo.tar.gz"
to "foo (1).tar.gz" instead of "foo.tar (1).gz"), and some other callers store
state based on extension, so rather than changing FilePath::Extension, add a
new FilePath::FinalExtension and change SelectFileDialog callers to use it.

Also work around a problem in NSSavePanel when saving "foo.tar.gz" with
extensions hidden.

TEST=FilePath.Extension,
     FilePath.Extension2
     FilePath.RemoveExtension
     Enabling "Ask where to save each file before downloading"; saving a tar.gz doesn't result in weird confirmation prompt on OS X, with or without "Show all filename extensions" enabled in Finder.
BUG=83084

Review URL: https://codereview.chromium.org/4883003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@239505 0039d316-1c4b-4281-b951-d872f2087c98


CrOS-Libchrome-Original-Commit: 18cbba0aedd9d569b27be8adbd2836c839ae9897
diff --git a/base/files/file_path.cc b/base/files/file_path.cc
index 4cfa2e6..3ea5856 100644
--- a/base/files/file_path.cc
+++ b/base/files/file_path.cc
@@ -107,18 +107,21 @@
 
 // Find the position of the '.' that separates the extension from the rest
 // of the file name. The position is relative to BaseName(), not value().
-// This allows a second extension component of up to 4 characters when the
-// rightmost extension component is a common double extension (gz, bz2, Z).
-// For example, foo.tar.gz or foo.tar.Z would have extension components of
-// '.tar.gz' and '.tar.Z' respectively. Returns npos if it can't find an
-// extension.
-StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
+// Returns npos if it can't find an extension.
+StringType::size_type FinalExtensionSeparatorPosition(const StringType& path) {
   // Special case "." and ".."
   if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory)
     return StringType::npos;
 
-  const StringType::size_type last_dot =
-      path.rfind(FilePath::kExtensionSeparator);
+  return path.rfind(FilePath::kExtensionSeparator);
+}
+
+// Same as above, but allow a second extension component of up to 4
+// characters when the rightmost extension component is a common double
+// extension (gz, bz2, Z).  For example, foo.tar.gz or foo.tar.Z would have
+// extension components of '.tar.gz' and '.tar.Z' respectively.
+StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
+  const StringType::size_type last_dot = FinalExtensionSeparatorPosition(path);
 
   // No extension, or the extension is the whole filename.
   if (last_dot == StringType::npos || last_dot == 0U)
@@ -370,6 +373,15 @@
   return base.path_.substr(dot, StringType::npos);
 }
 
+StringType FilePath::FinalExtension() const {
+  FilePath base(BaseName());
+  const StringType::size_type dot = FinalExtensionSeparatorPosition(base.path_);
+  if (dot == StringType::npos)
+    return StringType();
+
+  return base.path_.substr(dot, StringType::npos);
+}
+
 FilePath FilePath::RemoveExtension() const {
   if (Extension().empty())
     return *this;
@@ -381,6 +393,17 @@
   return FilePath(path_.substr(0, dot));
 }
 
+FilePath FilePath::RemoveFinalExtension() const {
+  if (FinalExtension().empty())
+    return *this;
+
+  const StringType::size_type dot = FinalExtensionSeparatorPosition(path_);
+  if (dot == StringType::npos)
+    return *this;
+
+  return FilePath(path_.substr(0, dot));
+}
+
 FilePath FilePath::InsertBeforeExtension(const StringType& suffix) const {
   if (suffix.empty())
     return FilePath(path_);
diff --git a/base/files/file_path.h b/base/files/file_path.h
index 33beb0b..f4b8ff8 100644
--- a/base/files/file_path.h
+++ b/base/files/file_path.h
@@ -224,18 +224,33 @@
   // Returns ".jpg" for path "C:\pics\jojo.jpg", or an empty string if
   // the file has no extension.  If non-empty, Extension() will always start
   // with precisely one ".".  The following code should always work regardless
-  // of the value of path.
+  // of the value of path.  For common double-extensions like .tar.gz and
+  // .user.js, this method returns the combined extension.  For a single
+  // component, use FinalExtension().
   // new_path = path.RemoveExtension().value().append(path.Extension());
   // ASSERT(new_path == path.value());
   // NOTE: this is different from the original file_util implementation which
   // returned the extension without a leading "." ("jpg" instead of ".jpg")
   StringType Extension() const;
 
+  // Returns the path's file extension, as in Extension(), but will
+  // never return a double extension.
+  //
+  // TODO(davidben): Check all our extension-sensitive code to see if
+  // we can rename this to Extension() and the other to something like
+  // LongExtension(), defaulting to short extensions and leaving the
+  // long "extensions" to logic like file_util::GetUniquePathNumber().
+  StringType FinalExtension() const;
+
   // Returns "C:\pics\jojo" for path "C:\pics\jojo.jpg"
   // NOTE: this is slightly different from the similar file_util implementation
   // which returned simply 'jojo'.
   FilePath RemoveExtension() const WARN_UNUSED_RESULT;
 
+  // Removes the path's file extension, as in RemoveExtension(), but
+  // ignores double extensions.
+  FilePath RemoveFinalExtension() const WARN_UNUSED_RESULT;
+
   // Inserts |suffix| after the file name portion of |path| but before the
   // extension.  Returns "" if BaseName() == "." or "..".
   // Examples:
diff --git a/base/files/file_path_unittest.cc b/base/files/file_path_unittest.cc
index 1b6e465..fa7627c 100644
--- a/base/files/file_path_unittest.cc
+++ b/base/files/file_path_unittest.cc
@@ -708,6 +708,7 @@
 
   FilePath jpg = base_dir.Append(FILE_PATH_LITERAL("foo.jpg"));
   EXPECT_EQ(FILE_PATH_LITERAL(".jpg"), jpg.Extension());
+  EXPECT_EQ(FILE_PATH_LITERAL(".jpg"), jpg.FinalExtension());
 
   FilePath base = jpg.BaseName().RemoveExtension();
   EXPECT_EQ(FILE_PATH_LITERAL("foo"), base.value());
@@ -717,6 +718,7 @@
 
   EXPECT_EQ(path_no_ext.value(), path_no_ext.RemoveExtension().value());
   EXPECT_EQ(FILE_PATH_LITERAL(""), path_no_ext.Extension());
+  EXPECT_EQ(FILE_PATH_LITERAL(""), path_no_ext.FinalExtension());
 }
 
 TEST_F(FilePathTest, Extension2) {
@@ -739,16 +741,9 @@
     { FPL("/foo/bar/"),              FPL("") },
     { FPL("/foo/bar./"),             FPL(".") },
     { FPL("/foo/bar/baz.ext1.ext2"), FPL(".ext2") },
-    { FPL("/foo.tar.gz"),            FPL(".tar.gz") },
-    { FPL("/foo.tar.Z"),             FPL(".tar.Z") },
-    { FPL("/foo.tar.bz2"),           FPL(".tar.bz2") },
     { FPL("/subversion-1.6.12.zip"), FPL(".zip") },
-    { FPL("/foo.1234.gz"),           FPL(".1234.gz") },
     { FPL("/foo.12345.gz"),          FPL(".gz") },
     { FPL("/foo..gz"),               FPL(".gz") },
-    { FPL("/foo.1234.tar.gz"),       FPL(".tar.gz") },
-    { FPL("/foo.tar.tar.gz"),        FPL(".tar.gz") },
-    { FPL("/foo.tar.gz.gz"),         FPL(".gz.gz") },
     { FPL("."),                      FPL("") },
     { FPL(".."),                     FPL("") },
     { FPL("./foo"),                  FPL("") },
@@ -757,14 +752,32 @@
     { FPL("/foo.bar////"),           FPL(".bar") },
     { FPL("/foo.bar/.."),            FPL("") },
     { FPL("/foo.bar/..////"),        FPL("") },
-    { FPL("/foo.1234.user.js"),      FPL(".user.js") },
-    { FPL("foo.user.js"),            FPL(".user.js") },
     { FPL("/foo.1234.luser.js"),     FPL(".js") },
     { FPL("/user.js"),               FPL(".js") },
   };
+  const struct UnaryTestData double_extension_cases[] = {
+    { FPL("/foo.tar.gz"),            FPL(".tar.gz") },
+    { FPL("/foo.tar.Z"),             FPL(".tar.Z") },
+    { FPL("/foo.tar.bz2"),           FPL(".tar.bz2") },
+    { FPL("/foo.1234.gz"),           FPL(".1234.gz") },
+    { FPL("/foo.1234.tar.gz"),       FPL(".tar.gz") },
+    { FPL("/foo.tar.tar.gz"),        FPL(".tar.gz") },
+    { FPL("/foo.tar.gz.gz"),         FPL(".gz.gz") },
+    { FPL("/foo.1234.user.js"),      FPL(".user.js") },
+    { FPL("foo.user.js"),            FPL(".user.js") },
+  };
   for (unsigned int i = 0; i < arraysize(cases); ++i) {
     FilePath path(cases[i].input);
     FilePath::StringType extension = path.Extension();
+    FilePath::StringType final_extension = path.FinalExtension();
+    EXPECT_STREQ(cases[i].expected, extension.c_str()) << "i: " << i <<
+        ", path: " << path.value();
+    EXPECT_STREQ(cases[i].expected, final_extension.c_str()) << "i: " << i <<
+        ", path: " << path.value();
+  }
+  for (unsigned int i = 0; i < arraysize(double_extension_cases); ++i) {
+    FilePath path(cases[i].input);
+    FilePath::StringType extension = path.Extension();
     EXPECT_STREQ(cases[i].expected, extension.c_str()) << "i: " << i <<
         ", path: " << path.value();
   }
@@ -850,7 +863,6 @@
     { FPL("foo."),                FPL("foo") },
     { FPL("foo.."),               FPL("foo.") },
     { FPL("foo.baz.dll"),         FPL("foo.baz") },
-    { FPL("foo.tar.gz"),          FPL("foo") },
 #if defined(FILE_PATH_USES_WIN_SEPARATORS)
     { FPL("C:\\foo.bar\\foo"),    FPL("C:\\foo.bar\\foo") },
     { FPL("C:\\foo.bar\\..\\\\"), FPL("C:\\foo.bar\\..\\\\") },
@@ -861,8 +873,19 @@
   for (unsigned int i = 0; i < arraysize(cases); ++i) {
     FilePath path(cases[i].input);
     FilePath removed = path.RemoveExtension();
+    FilePath removed_final = path.RemoveFinalExtension();
     EXPECT_EQ(cases[i].expected, removed.value()) << "i: " << i <<
         ", path: " << path.value();
+    EXPECT_EQ(cases[i].expected, removed_final.value()) << "i: " << i <<
+        ", path: " << path.value();
+  }
+  {
+    FilePath path(FPL("foo.tar.gz"));
+    FilePath removed = path.RemoveExtension();
+    FilePath removed_final = path.RemoveFinalExtension();
+    EXPECT_EQ(FPL("foo"), removed.value()) << ", path: " << path.value();
+    EXPECT_EQ(FPL("foo.tar"), removed_final.value()) << ", path: "
+                                                     << path.value();
   }
 }