COMPRESS-191 add checksum check to TarArchiveEntry. Submitted by Jukka Zitting.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1358504 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
index ccc3ee1..2108aec 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
@@ -132,6 +132,9 @@
/** The entry's modification time. */
private long modTime;
+ /** If the header checksum is reasonably correct. */
+ private boolean checkSumOK;
+
/** The entry's link flag. */
private byte linkFlag;
@@ -551,6 +554,17 @@
}
/**
+ * Get this entry's checksum status.
+ *
+ * @return if the header checksum is reasonably correct
+ * @see TarUtils#verifyCheckSum(byte[])
+ * @since 1.5
+ */
+ public boolean isCheckSumOK() {
+ return checkSumOK;
+ }
+
+ /**
* Get this entry's file.
*
* @return This entry's file.
@@ -942,6 +956,7 @@
offset += SIZELEN;
modTime = TarUtils.parseOctalOrBinary(header, offset, MODTIMELEN);
offset += MODTIMELEN;
+ checkSumOK = TarUtils.verifyCheckSum(header);
offset += CHKSUMLEN;
linkFlag = header[offset++];
linkName = oldStyle ? TarUtils.parseName(header, offset, NAMELEN)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
index 9221f47..3872946 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java
@@ -69,6 +69,11 @@
int CHKSUMLEN = 8;
/**
+ * Offset of the checksum field within header record.
+ */
+ int CHKSUM_OFFSET = 148;
+
+ /**
* The length of the size field in a header buffer.
* Includes the trailing space or NUL.
*/
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index deb29f0..4940d1d 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -18,6 +18,9 @@
*/
package org.apache.commons.compress.archivers.tar;
+import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
+import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
+
import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
@@ -558,4 +561,63 @@
return sum;
}
+ /**
+ * Wikipedia <a href="http://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
+ * <blockquote>
+ * The checksum is calculated by taking the sum of the unsigned byte values
+ * of the header block with the eight checksum bytes taken to be ascii
+ * spaces (decimal value 32). It is stored as a six digit octal number with
+ * leading zeroes followed by a NUL and then a space. Various
+ * implementations do not adhere to this format. For better compatibility,
+ * ignore leading and trailing whitespace, and get the first six digits. In
+ * addition, some historic tar implementations treated bytes as signed.
+ * Implementations typically calculate the checksum both ways, and treat it
+ * as good if either the signed or unsigned sum matches the included
+ * checksum.
+ * </blockquote>
+ * <p>
+ * In addition there are
+ * <a href="https://issues.apache.org/jira/browse/COMPRESS-117">some tar files</a>
+ * that seem to have parts of their header cleared to zero (no detectable
+ * magic bytes, etc.) but still have a reasonable-looking checksum field
+ * present. It looks like we can detect such cases reasonably well by
+ * checking whether the stored checksum is <em>greater than</em> the
+ * computed unsigned checksum. That check is unlikely to pass on some
+ * random file header, as it would need to have a valid sequence of
+ * octal digits in just the right place.
+ * <p>
+ * The return value of this method should be treated as a best-effort
+ * heuristic rather than an absolute and final truth. The checksum
+ * verification logic may well evolve over time as more special cases
+ * are encountered.
+ *
+ * @param header tar header
+ * @return whether the checksum is reasonably good
+ * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
+ * @since 1.5
+ */
+ public static boolean verifyCheckSum(byte[] header) {
+ long storedSum = 0;
+ long unsignedSum = 0;
+ long signedSum = 0;
+
+ int digits = 0;
+ for (int i = 0; i < header.length; i++) {
+ byte b = header[i];
+ if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
+ if ('0' <= b && b <= '7' && digits++ < 6) {
+ storedSum = storedSum * 8 + b - '0';
+ } else if (digits > 0) {
+ digits = 6; // only look at the first octal digit sequence
+ }
+ b = ' ';
+ }
+ unsignedSum += 0xff & b;
+ signedSum += b;
+ }
+
+ return storedSum == unsignedSum || storedSum == signedSum
+ || storedSum > unsignedSum; // COMPRESS-177
+ }
+
}
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
index 1cf2ccc..2f8d6f4 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
@@ -80,15 +80,19 @@
t = tin.getNextTarEntry();
assertNotNull(t);
assertEquals("/", t.getName());
+ assertTrue(t.isCheckSumOK());
t = tin.getNextTarEntry();
assertNotNull(t);
assertEquals("foo.txt", t.getName());
+ assertTrue(t.isCheckSumOK());
t = tin.getNextTarEntry();
assertNotNull(t);
assertEquals("bar.txt", t.getName());
+ assertTrue(t.isCheckSumOK());
t = tin.getNextTarEntry();
assertNotNull(t);
assertEquals("baz.txt", t.getName());
+ assertTrue(t.isCheckSumOK());
} finally {
if (tin != null) {
tin.close();
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
index 5e4f258..d687ce4 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
@@ -76,6 +76,7 @@
assertEquals("sample/link-to-txt-file.lnk", tae.getName());
assertEquals(new Date(0), tae.getLastModifiedDate());
assertTrue(tae.isSymbolicLink());
+ assertTrue(tae.isCheckSumOK());
} finally {
if (in != null) {
in.close();
@@ -105,6 +106,7 @@
cal.set(1969, 11, 31, 23, 59, 59);
cal.set(Calendar.MILLISECOND, 0);
assertEquals(cal.getTime(), tae.getLastModifiedDate());
+ assertTrue(tae.isCheckSumOK());
} finally {
if (in != null) {
in.close();
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
index 3bc3b60..f45491a 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
@@ -239,4 +239,97 @@
assertEquals(-3601l, TarUtils.parseOctalOrBinary(b, 0, 8));
}
+ // https://issues.apache.org/jira/browse/COMPRESS-191
+ public void testVerifyHeaderCheckSum() {
+ byte[] valid = { // from bla.tar
+ 116, 101, 115, 116, 49, 46, 120, 109, 108, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 48, 48, 48, 48, 54, 52, 52, 0, 48, 48, 48, 48, 55, 54, 53,
+ 0, 48, 48, 48, 48, 55, 54, 53, 0, 48, 48, 48, 48, 48, 48, 48,
+ 49, 49, 52, 50, 0, 49, 48, 55, 49, 54, 53, 52, 53, 54, 50, 54,
+ 0, 48, 49, 50, 50, 54, 48, 0, 32, 48, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 117, 115, 116, 97, 114, 32, 32, 0,
+ 116, 99, 117, 114, 100, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 99, 117,
+ 114, 100, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0 };
+ assertTrue(TarUtils.verifyCheckSum(valid));
+
+ byte[] compress117 = { // from COMPRESS-117
+ 116, 101, 115, 116, 49, 46, 120, 109, 108, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 48, 48, 48, 48, 54, 52, 52, 0, 48, 48, 48, 48, 55, 54, 53,
+ 0, 48, 48, 48, 48, 55, 54, 53, 0, 48, 48, 48, 48, 48, 48, 48,
+ 49, 49, 52, 50, 0, 49, 48, 55, 49, 54, 53, 52, 53, 54, 50, 54,
+ 0, 48, 49, 50, 50, 54, 48, 0, 32, 48, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ assertTrue(TarUtils.verifyCheckSum(compress117));
+
+ byte[] invalid = { // from the testAIFF.aif file in Tika
+ 70, 79, 82, 77, 0, 0, 15, 46, 65, 73, 70, 70, 67, 79, 77, 77,
+ 0, 0, 0, 18, 0, 2, 0, 0, 3, -64, 0, 16, 64, 14, -84, 68, 0, 0,
+ 0, 0, 0, 0, 83, 83, 78, 68, 0, 0, 15, 8, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, -1, -1, 0, 1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 1, -1, -1,
+ 0, 0, 0, 0, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 1, -1, -1, 0, 1, -1, -2, 0, 1, -1, -1, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0,
+ 2, -1, -2, 0, 2, -1, -1, 0, 0, 0, 1, -1, -1, 0, 1, -1, -1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0,
+ 0, 1, -1, -1, 0, 0, 0, 1, -1, -1, 0, 0, 0, 1, -1, -2, 0, 2,
+ -1, -1, 0, 0, 0, 0, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 1, -1, -1, 0, 2, -1, -2,
+ 0, 2, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, -1,
+ -2, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -1, -1, 0, 1, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -1, 0, 0,
+ 0, 0, 0, 0, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 2, -1, -2,
+ 0, 1, 0, 0, -1, -1, 0, 2, -1, -2, 0, 2, -1, -2, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0,
+ 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 2, -1, -1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, -2, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 2, -1, -2,
+ 0, 2, -1, -2, 0, 2, -1, -1, 0, 0, 0, 0, -1, -1, 0, 1, -1, -1,
+ 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, 0, 0, 0, 0,
+ -1, -1, 0, 2, -1, -2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1 };
+ assertFalse(TarUtils.verifyCheckSum(invalid));
+ }
+
}