Treat multiple extensions like .tar.gz as a single extension.

The logic is taken from firefox.

BUG=48346
TEST=unit tests; downloading the same .tar.gz file multiple times (see bug)

Review URL: http://codereview.chromium.org/3018011

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@53844 0039d316-1c4b-4281-b951-d872f2087c98


CrOS-Libchrome-Original-Commit: 3432291b5b66e9bea09537c3bfbc4d80f27442a4
diff --git a/base/file_path.cc b/base/file_path.cc
index 1787a69..dd80eab 100644
--- a/base/file_path.cc
+++ b/base/file_path.cc
@@ -36,16 +36,18 @@
 
 const FilePath::CharType FilePath::kExtensionSeparator = FILE_PATH_LITERAL('.');
 
+typedef FilePath::StringType StringType;
 
 namespace {
 
+const char* kCommonDoubleExtensions[] = { "gz", "z", "bz2" };
+
 // If this FilePath contains a drive letter specification, returns the
 // position of the last character of the drive letter specification,
 // otherwise returns npos.  This can only be true on Windows, when a pathname
 // begins with a letter followed by a colon.  On other platforms, this always
 // returns npos.
-FilePath::StringType::size_type FindDriveLetter(
-    const FilePath::StringType& path) {
+StringType::size_type FindDriveLetter(const StringType& path) {
 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
   // This is dependent on an ASCII-based character set, but that's a
   // reasonable assumption.  iswalpha can be too inclusive here.
@@ -55,35 +57,33 @@
     return 1;
   }
 #endif  // FILE_PATH_USES_DRIVE_LETTERS
-  return FilePath::StringType::npos;
+  return StringType::npos;
 }
 
-
 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
-bool EqualDriveLetterCaseInsensitive(const FilePath::StringType a,
-                                     const FilePath::StringType b) {
+bool EqualDriveLetterCaseInsensitive(const StringType a,
+                                     const StringType b) {
   size_t a_letter_pos = FindDriveLetter(a);
   size_t b_letter_pos = FindDriveLetter(b);
 
-  if ((a_letter_pos == FilePath::StringType::npos) ||
-      (b_letter_pos == FilePath::StringType::npos))
+  if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos)
     return a == b;
 
-  FilePath::StringType a_letter(a.substr(0, a_letter_pos + 1));
-  FilePath::StringType b_letter(b.substr(0, b_letter_pos + 1));
+  StringType a_letter(a.substr(0, a_letter_pos + 1));
+  StringType b_letter(b.substr(0, b_letter_pos + 1));
   if (!StartsWith(a_letter, b_letter, false))
     return false;
 
-  FilePath::StringType a_rest(a.substr(a_letter_pos + 1));
-  FilePath::StringType b_rest(b.substr(b_letter_pos + 1));
+  StringType a_rest(a.substr(a_letter_pos + 1));
+  StringType b_rest(b.substr(b_letter_pos + 1));
   return a_rest == b_rest;
 }
 #endif  // defined(FILE_PATH_USES_DRIVE_LETTERS)
 
-bool IsPathAbsolute(const FilePath::StringType& path) {
+bool IsPathAbsolute(const StringType& path) {
 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
-  FilePath::StringType::size_type letter = FindDriveLetter(path);
-  if (letter != FilePath::StringType::npos) {
+  StringType::size_type letter = FindDriveLetter(path);
+  if (letter != StringType::npos) {
     // Look for a separator right after the drive specification.
     return path.length() > letter + 1 &&
         FilePath::IsSeparator(path[letter + 1]);
@@ -97,8 +97,8 @@
 #endif  // FILE_PATH_USES_DRIVE_LETTERS
 }
 
-bool AreAllSeparators(const FilePath::StringType& input) {
-  for (FilePath::StringType::const_iterator it = input.begin();
+bool AreAllSeparators(const StringType& input) {
+  for (StringType::const_iterator it = input.begin();
       it != input.end(); ++it) {
     if (!FilePath::IsSeparator(*it))
       return false;
@@ -107,6 +107,54 @@
   return true;
 }
 
+// Find the position of the '.' that separates the extension from the rest
+// of the file name. The position is relative to BaseName(), not value().
+// This allows a second extension component of up to 4 characters when the
+// rightmost extension component is a common double extension (gz, bz2, Z).
+// For example, foo.tar.gz or foo.tar.Z would have extension components of
+// '.tar.gz' and '.tar.Z' respectively. Returns npos if it can't find an
+// extension.
+StringType::size_type ExtensionSeparatorPosition(const StringType& path) {
+  // Special case "." and ".."
+  if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory)
+    return StringType::npos;
+
+  const StringType::size_type last_dot =
+      path.rfind(FilePath::kExtensionSeparator);
+
+  // No extension, or the extension is the whole filename.
+  if (last_dot == StringType::npos || last_dot == 0U)
+    return last_dot;
+
+  // Special case .<extension1>.<extension2>, but only if the final extension
+  // is one of a few common double extensions.
+  StringType extension(path, last_dot + 1);
+  bool is_common_double_extension = false;
+  for (size_t i = 0; i < arraysize(kCommonDoubleExtensions); ++i) {
+    if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i]))
+      is_common_double_extension = true;
+  }
+  if (!is_common_double_extension)
+    return last_dot;
+
+  // Check that <extension1> is 1-4 characters, otherwise fall back to
+  // <extension2>.
+  const StringType::size_type penultimate_dot =
+      path.rfind(FilePath::kExtensionSeparator, last_dot - 1);
+  const StringType::size_type last_separator =
+      path.find_last_of(FilePath::kSeparators, last_dot - 1,
+                        arraysize(FilePath::kSeparators) - 1);
+  if (penultimate_dot != StringType::npos &&
+      (last_separator == StringType::npos ||
+      penultimate_dot > last_separator) &&
+      last_dot - penultimate_dot <= 5U &&
+      last_dot - penultimate_dot > 1U) {
+    return penultimate_dot;
+  }
+
+  return last_dot;
+}
+
 }  // namespace
 
 FilePath::FilePath() {
@@ -136,8 +184,7 @@
   return false;
 }
 
-void FilePath::GetComponents(std::vector<FilePath::StringType>* components)
-    const {
+void FilePath::GetComponents(std::vector<StringType>* components) const {
   DCHECK(components);
   if (!components)
     return;
@@ -145,7 +192,7 @@
   if (value().empty())
     return;
 
-  std::vector<FilePath::StringType> ret_val;
+  std::vector<StringType> ret_val;
   FilePath current = *this;
   FilePath base;
 
@@ -165,12 +212,11 @@
   // Capture drive letter, if any.
   FilePath dir = current.DirName();
   StringType::size_type letter = FindDriveLetter(dir.value());
-  if (letter != FilePath::StringType::npos) {
-    ret_val.push_back(FilePath::StringType(dir.value(), 0, letter + 1));
+  if (letter != StringType::npos) {
+    ret_val.push_back(StringType(dir.value(), 0, letter + 1));
   }
 
-  *components = std::vector<FilePath::StringType>(ret_val.rbegin(),
-                                                  ret_val.rend());
+  *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend());
 }
 
 bool FilePath::operator==(const FilePath& that) const {
@@ -195,8 +241,8 @@
 
 bool FilePath::AppendRelativePath(const FilePath& child,
                                   FilePath* path) const {
-  std::vector<FilePath::StringType> parent_components;
-  std::vector<FilePath::StringType> child_components;
+  std::vector<StringType> parent_components;
+  std::vector<StringType> child_components;
   GetComponents(&parent_components);
   child.GetComponents(&child_components);
 
@@ -205,17 +251,17 @@
   if (parent_components.size() == 0)
     return false;
 
-  std::vector<FilePath::StringType>::const_iterator parent_comp =
+  std::vector<StringType>::const_iterator parent_comp =
       parent_components.begin();
-  std::vector<FilePath::StringType>::const_iterator child_comp =
+  std::vector<StringType>::const_iterator child_comp =
       child_components.begin();
 
 #if defined(FILE_PATH_USES_DRIVE_LETTERS)
   // Windows can access case sensitive filesystems, so component
   // comparisions must be case sensitive, but drive letters are
   // never case sensitive.
-  if ((FindDriveLetter(*parent_comp) != FilePath::StringType::npos) &&
-      (FindDriveLetter(*child_comp) != FilePath::StringType::npos)) {
+  if ((FindDriveLetter(*parent_comp) != StringType::npos) &&
+      (FindDriveLetter(*child_comp) != StringType::npos)) {
     if (!StartsWith(*parent_comp, *child_comp, false))
       return false;
     ++parent_comp;
@@ -301,30 +347,24 @@
   return new_path;
 }
 
-FilePath::StringType FilePath::Extension() const {
-  // BaseName() calls StripTrailingSeparators, so cases like /foo.baz/// work.
-  StringType base = BaseName().value();
-
-  // Special case "." and ".."
-  if (base == kCurrentDirectory || base == kParentDirectory)
+StringType FilePath::Extension() const {
+  FilePath base(BaseName());
+  const StringType::size_type dot = ExtensionSeparatorPosition(base.path_);
+  if (dot == StringType::npos)
     return StringType();
 
-  const StringType::size_type last_dot = base.rfind(kExtensionSeparator);
-  if (last_dot == StringType::npos)
-    return StringType();
-  return StringType(base, last_dot);
+  return base.path_.substr(dot, StringType::npos);
 }
 
 FilePath FilePath::RemoveExtension() const {
-  StringType ext = Extension();
-  // It's important to check Extension() since that verifies that the
-  // kExtensionSeparator actually appeared in the last path component.
-  if (ext.empty())
-    return FilePath(path_);
-  // Since Extension() verified that the extension is in fact in the last path
-  // component, this substr will effectively strip trailing separators.
-  const StringType::size_type last_dot = path_.rfind(kExtensionSeparator);
-  return FilePath(path_.substr(0, last_dot));
+  if (Extension().empty())
+    return *this;
+
+  const StringType::size_type dot = ExtensionSeparatorPosition(path_);
+  if (dot == StringType::npos)
+    return *this;
+
+  return FilePath(path_.substr(0, dot));
 }
 
 FilePath FilePath::InsertBeforeExtension(const StringType& suffix) const {
@@ -390,7 +430,7 @@
 bool FilePath::MatchesExtension(const StringType& extension) const {
   DCHECK(extension.empty() || extension[0] == kExtensionSeparator);
 
-  FilePath::StringType current_extension = Extension();
+  StringType current_extension = Extension();
 
   if (current_extension.length() != extension.length())
     return false;
@@ -950,7 +990,7 @@
   }
 }
 
-FilePath::StringType FilePath::GetHFSDecomposedForm(const StringType& string) {
+StringType FilePath::GetHFSDecomposedForm(const StringType& string) {
   scoped_cftyperef<CFStringRef> cfstring(
       CFStringCreateWithBytesNoCopy(
           NULL,
@@ -1071,7 +1111,7 @@
 
 // static.
 void FilePath::WriteStringTypeToPickle(Pickle* pickle,
-                                       const FilePath::StringType& path) {
+                                       const StringType& path) {
 #if defined(WCHAR_T_IS_UTF16)
   pickle->WriteWString(path);
 #elif defined(WCHAR_T_IS_UTF32)
@@ -1083,7 +1123,7 @@
 
 // static.
 bool FilePath::ReadStringTypeFromPickle(Pickle* pickle, void** iter,
-                                        FilePath::StringType* path) {
+                                        StringType* path) {
 #if defined(WCHAR_T_IS_UTF16)
   if (!pickle->ReadWString(iter, path))
     return false;
@@ -1129,12 +1169,12 @@
 }
 
 bool FilePath::ReferencesParent() const {
-  std::vector<FilePath::StringType> components;
+  std::vector<StringType> components;
   GetComponents(&components);
 
-  std::vector<FilePath::StringType>::const_iterator it = components.begin();
+  std::vector<StringType>::const_iterator it = components.begin();
   for (; it != components.end(); ++it) {
-    const FilePath::StringType& component = *it;
+    const StringType& component = *it;
     if (component == kParentDirectory)
       return true;
   }
diff --git a/base/file_path_unittest.cc b/base/file_path_unittest.cc
index 828a642..48bf23c 100644
--- a/base/file_path_unittest.cc
+++ b/base/file_path_unittest.cc
@@ -698,16 +698,16 @@
   FilePath base_dir(FILE_PATH_LITERAL("base_dir"));
 
   FilePath jpg = base_dir.Append(FILE_PATH_LITERAL("foo.jpg"));
-  EXPECT_EQ(jpg.Extension(), FILE_PATH_LITERAL(".jpg"));
+  EXPECT_EQ(FILE_PATH_LITERAL(".jpg"), jpg.Extension());
 
   FilePath base = jpg.BaseName().RemoveExtension();
-  EXPECT_EQ(base.value(), FILE_PATH_LITERAL("foo"));
+  EXPECT_EQ(FILE_PATH_LITERAL("foo"), base.value());
 
   FilePath path_no_ext = base_dir.Append(base);
-  EXPECT_EQ(jpg.RemoveExtension().value(), path_no_ext.value());
+  EXPECT_EQ(path_no_ext.value(), jpg.RemoveExtension().value());
 
   EXPECT_EQ(path_no_ext.value(), path_no_ext.RemoveExtension().value());
-  EXPECT_EQ(path_no_ext.Extension(), FILE_PATH_LITERAL(""));
+  EXPECT_EQ(FILE_PATH_LITERAL(""), path_no_ext.Extension());
 }
 
 TEST_F(FilePathTest, Extension2) {
@@ -730,6 +730,16 @@
     { FPL("/foo/bar/"),              FPL("") },
     { FPL("/foo/bar./"),             FPL(".") },
     { FPL("/foo/bar/baz.ext1.ext2"), FPL(".ext2") },
+    { FPL("/foo.tar.gz"),            FPL(".tar.gz") },
+    { FPL("/foo.tar.Z"),             FPL(".tar.Z") },
+    { FPL("/foo.tar.bz2"),           FPL(".tar.bz2") },
+    { FPL("/subversion-1.6.12.zip"), FPL(".zip") },
+    { FPL("/foo.1234.gz"),           FPL(".1234.gz") },
+    { FPL("/foo.12345.gz"),          FPL(".gz") },
+    { FPL("/foo..gz"),               FPL(".gz") },
+    { FPL("/foo.1234.tar.gz"),       FPL(".tar.gz") },
+    { FPL("/foo.tar.tar.gz"),        FPL(".tar.gz") },
+    { FPL("/foo.tar.gz.gz"),         FPL(".gz.gz") },
     { FPL("."),                      FPL("") },
     { FPL(".."),                     FPL("") },
     { FPL("./foo"),                  FPL("") },
@@ -815,6 +825,34 @@
   }
 }
 
+TEST_F(FilePathTest, RemoveExtension) {
+  const struct UnaryTestData cases[] = {
+    { FPL(""),                    FPL("") },
+    { FPL("."),                   FPL(".") },
+    { FPL(".."),                  FPL("..") },
+    { FPL("foo.dll"),             FPL("foo") },
+    { FPL("./foo.dll"),           FPL("./foo") },
+    { FPL("foo..dll"),            FPL("foo.") },
+    { FPL("foo"),                 FPL("foo") },
+    { FPL("foo."),                FPL("foo") },
+    { FPL("foo.."),               FPL("foo.") },
+    { FPL("foo.baz.dll"),         FPL("foo.baz") },
+    { FPL("foo.tar.gz"),          FPL("foo") },
+#if defined(FILE_PATH_USES_WIN_SEPARATORS)
+    { FPL("C:\\foo.bar\\foo"),    FPL("C:\\foo.bar\\foo") },
+    { FPL("C:\\foo.bar\\..\\\\"), FPL("C:\\foo.bar\\..\\\\") },
+#endif
+    { FPL("/foo.bar/foo"),        FPL("/foo.bar/foo") },
+    { FPL("/foo.bar/..////"),     FPL("/foo.bar/..////") },
+  };
+  for (unsigned int i = 0; i < arraysize(cases); ++i) {
+    FilePath path(cases[i].input);
+    FilePath removed = path.RemoveExtension();
+    EXPECT_EQ(cases[i].expected, removed.value()) << "i: " << i <<
+        ", path: " << path.value();
+  }
+}
+
 TEST_F(FilePathTest, ReplaceExtension) {
   const struct BinaryTestData cases[] = {
     { { FPL(""),              FPL("") },      FPL("") },
@@ -823,6 +861,7 @@
     { { FPL(".."),            FPL("txt") },   FPL("") },
     { { FPL("."),             FPL("") },      FPL("") },
     { { FPL("foo.dll"),       FPL("txt") },   FPL("foo.txt") },
+    { { FPL("./foo.dll"),     FPL("txt") },   FPL("./foo.txt") },
     { { FPL("foo..dll"),      FPL("txt") },   FPL("foo..txt") },
     { { FPL("foo.dll"),       FPL(".txt") },  FPL("foo.txt") },
     { { FPL("foo"),           FPL("txt") },   FPL("foo.txt") },