COMPRESS-331 make tar checksum check as strict as GNU tar
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 518381e..6bc1df2 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -44,6 +44,15 @@
<body>
<release version="1.11" date="not released, yet"
description="Release 1.11">
+ <action issue="COMPRESS-331" type="fix" date="2016-01-31">
+ The checksum validation of TararchiveEntry is now as strict as
+ the validation of GNU tar, which eliminates a few cases of
+ false positives of ArchiveStreamFactory.
+ This behavior is a breaking change since the check has become
+ more strict but any archive that fails the checksum test now
+ would also fail it when extracted with other tools and must be
+ considered an invalid archive.
+ </action>
<action issue="COMPRESS-323" type="add" date="2016-01-29">
ZipFile.getRawInputStream() is now part of the public API
</action>
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 3cbf83f..65088eb 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -580,16 +580,6 @@
* checksum.
* </blockquote>
* <p>
- * In addition there are
- * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
- * that seem to have parts of their header cleared to zero (no detectable
- * magic bytes, etc.) but still have a reasonable-looking checksum field
- * present. It looks like we can detect such cases reasonably well by
- * checking whether the stored checksum is <em>greater than</em> the
- * computed unsigned checksum. That check is unlikely to pass on some
- * random file header, as it would need to have a valid sequence of
- * octal digits in just the right place.
- * <p>
* The return value of this method should be treated as a best-effort
* heuristic rather than an absolute and final truth. The checksum
* verification logic may well evolve over time as more special cases
@@ -619,9 +609,7 @@
unsignedSum += 0xff & b;
signedSum += b;
}
-
- return storedSum == unsignedSum || storedSum == signedSum
- || storedSum > unsignedSum; // COMPRESS-177
+ return storedSum == unsignedSum || storedSum == signedSum;
}
}
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
index 5ebbbb5..9b83116 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
@@ -286,32 +286,44 @@
assertTrue(TarUtils.verifyCheckSum(valid));
byte[] compress117 = { // from COMPRESS-117
- 116, 101, 115, 116, 49, 46, 120, 109, 108, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 48, 48, 48, 48, 54, 52, 52, 0, 48, 48, 48, 48, 55, 54, 53,
- 0, 48, 48, 48, 48, 55, 54, 53, 0, 48, 48, 48, 48, 48, 48, 48,
- 49, 49, 52, 50, 0, 49, 48, 55, 49, 54, 53, 52, 53, 54, 50, 54,
- 0, 48, 49, 50, 50, 54, 48, 0, 32, 48, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ (byte) 0x37, (byte) 0x7a, (byte) 0x43, (byte) 0x2e, (byte) 0x74, (byte) 0x78, (byte) 0x74, (byte) 0x00,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, (byte) 0x31, (byte) 0x30, (byte) 0x30, (byte) 0x37,
+ (byte) 0x37, (byte) 0x37, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x30, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x30, (byte) 0x20, (byte) 0x00, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x31, (byte) 0x33, (byte) 0x30, (byte) 0x33, (byte) 0x33, (byte) 0x20,
+ (byte) 0x31, (byte) 0x31, (byte) 0x31, (byte) 0x31, (byte) 0x35, (byte) 0x31, (byte) 0x36, (byte) 0x36,
+ (byte) 0x30, (byte) 0x31, (byte) 0x36, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x35, (byte) 0x34,
+ (byte) 0x31, (byte) 0x37, (byte) 0x20, (byte) 0x00, (byte) 0x30, (byte) 0x00, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
assertTrue(TarUtils.verifyCheckSum(compress117));
byte[] invalid = { // from the testAIFF.aif file in Tika
diff --git a/src/test/resources/COMPRESS-117.tar b/src/test/resources/COMPRESS-117.tar
index d42ebeb..51abeb9 100644
--- a/src/test/resources/COMPRESS-117.tar
+++ b/src/test/resources/COMPRESS-117.tar
Binary files differ