Add preliminary read-only support for the 7z archive format,
together with Copy and LZMA2 decompression,
some tests, documentation, and Javadocs.
Give attribution as per LEGAL-72.
Jira issue key: COMPRESS-54
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1480055 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/NOTICE.txt b/NOTICE.txt
index 9b5b734..6f183f8 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -3,3 +3,11 @@
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
+
+====================
+
+The files in the package org.apache.commons.compress.archivers.sevenz
+were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/),
+which has been placed in the public domain:
+
+"LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html)
diff --git a/pom.xml b/pom.xml
index 48815f7..d9a2ed1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
<!-- The description is not indented to make it look better in the release notes -->
<description>
Apache Commons Compress software defines an API for working with compression and archive formats.
-These include: bzip2, gzip, pack200, xz and ar, cpio, jar, tar, zip, dump.
+These include: bzip2, gzip, pack200, xz and ar, cpio, jar, tar, zip, dump, 7z.
</description>
<properties>
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java
new file mode 100644
index 0000000..cd8d895
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.BitSet;
+
+class Archive {
+ /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
+ long packPos;
+ /// Size of each packed stream.
+ long[] packSizes;
+ /// Whether each particular packed streams has a CRC.
+ BitSet packCrcsDefined;
+ /// CRCs for each packed stream, valid only if that packed stream has one.
+ int[] packCrcs;
+ /// Properties of solid compression blocks.
+ Folder[] folders;
+ /// Temporary properties for non-empty files (subsumed into the files array later).
+ SubStreamsInfo subStreamsInfo;
+ /// The files and directories in the archive.
+ SevenZArchiveEntry[] files;
+ /// Mapping between folders, files and streams.
+ StreamMap streamMap;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java
new file mode 100644
index 0000000..96e5bab
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class BindPair {
+ long inIndex;
+ long outIndex;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java
new file mode 100644
index 0000000..cbd271d
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class Coder {
+ byte[] decompressionMethodId;
+ long numInStreams;
+ long numOutStreams;
+ byte[] properties = null;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java
new file mode 100644
index 0000000..eff28e0
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+/**
+ * The unit of solid compression.
+ */
+class Folder {
+ /// List of coders used in this folder, eg. one for compression, one for encryption.
+ Coder[] coders;
+ /// Total number of input streams across all coders.
+ long totalInputStreams;
+ /// Total number of output streams across all coders.
+ long totalOutputStreams;
+ /// Mapping between input and output streams.
+ BindPair[] bindPairs;
+ /// Indeces of input streams, one per input stream not listed in bindPairs.
+ long[] packedStreams;
+ /// Unpack sizes, per each output stream.
+ long[] unpackSizes;
+ /// Whether the folder has a CRC.
+ boolean hasCrc;
+ /// The CRC, if present.
+ int crc;
+ /// The number of unpack substreams, one per non-empty file in this folder.
+ int numUnpackSubStreams;
+
+ int findBindPairForInStream(final int index) {
+ for (int i = 0; i < bindPairs.length; i++) {
+ if (bindPairs[i].inIndex == index) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ int findBindPairForOutStream(final int index) {
+ for (int i = 0; i < bindPairs.length; i++) {
+ if (bindPairs[i].outIndex == index) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ long getUnpackSize() {
+ if (totalOutputStreams == 0) {
+ return 0;
+ }
+ for (int i = ((int)totalOutputStreams) - 1; i >= 0; i--) {
+ if (findBindPairForOutStream(i) < 0) {
+ return unpackSizes[i];
+ }
+ }
+ return 0;
+ }
+}
+
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java
new file mode 100644
index 0000000..89a813a
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+final class NID {
+ public static final int kEnd = 0x00;
+ public static final int kHeader = 0x01;
+ public static final int kArchiveProperties = 0x02;
+ public static final int kAdditionalStreamsInfo = 0x03;
+ public static final int kMainStreamsInfo = 0x04;
+ public static final int kFilesInfo = 0x05;
+ public static final int kPackInfo = 0x06;
+ public static final int kUnpackInfo = 0x07;
+ public static final int kSubStreamsInfo = 0x08;
+ public static final int kSize = 0x09;
+ public static final int kCRC = 0x0A;
+ public static final int kFolder = 0x0B;
+ public static final int kCodersUnpackSize = 0x0C;
+ public static final int kNumUnpackStream = 0x0D;
+ public static final int kEmptyStream = 0x0E;
+ public static final int kEmptyFile = 0x0F;
+ public static final int kAnti = 0x10;
+ public static final int kName = 0x11;
+ public static final int kCTime = 0x12;
+ public static final int kATime = 0x13;
+ public static final int kMTime = 0x14;
+ public static final int kWinAttributes = 0x15;
+ public static final int kComment = 0x16;
+ public static final int kEncodedHeader = 0x17;
+ public static final int kStartPos = 0x18;
+ public static final int kDummy = 0x19;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java
new file mode 100644
index 0000000..a13e50e
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import org.apache.commons.compress.archivers.ArchiveEntry;
+
+/**
+ * An entry in a 7z archive.
+ *
+ * @NotThreadSafe
+ */
+public class SevenZArchiveEntry implements ArchiveEntry {
+ private String name;
+ private boolean hasStream;
+ private boolean isDirectory;
+ private boolean isAntiItem;
+ private boolean hasCreationDate;
+ private boolean hasLastModifiedDate;
+ private boolean hasAcessDate;
+ private long creationDate;
+ private long lastModifiedDate;
+ private long accessDate;
+ private boolean hasWindowsAttributes;
+ private int windowsAttributes;
+ private boolean hasCrc;
+ private int crc;
+ private long size;
+
+ public SevenZArchiveEntry() {
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public boolean hasStream() {
+ return hasStream;
+ }
+
+ public void setHasStream(boolean hasStream) {
+ this.hasStream = hasStream;
+ }
+
+ public boolean isDirectory() {
+ return isDirectory;
+ }
+
+ public void setDirectory(boolean isDirectory) {
+ this.isDirectory = isDirectory;
+ }
+
+ /**
+ * Indicates whether this is an "anti-item" used in differential backups,
+ * meaning it should delete the same file from a previous backup.
+ * @return true if it is an anti-item, false otherwise
+ */
+ public boolean isAntiItem() {
+ return isAntiItem;
+ }
+
+ /**
+ * Sets whether this is an "anti-item" used in differential backups,
+ * meaning it should delete the same file from a previous backup.
+ * @param isAntiItem true if it is an ait-item, false otherwise
+ */
+ public void setAntiItem(boolean isAntiItem) {
+ this.isAntiItem = isAntiItem;
+ }
+
+ public boolean getHasCreationDate() {
+ return hasCreationDate;
+ }
+
+ public void setHasCreationDate(boolean hasCreationDate) {
+ this.hasCreationDate = hasCreationDate;
+ }
+
+ public Date getCreationDate() {
+ if (hasCreationDate) {
+ return ntfsTimeToJavaTime(creationDate);
+ } else {
+ throw new UnsupportedOperationException(
+ "The entry doesn't have this timestamp");
+ }
+ }
+
+ public void setCreationDate(long ntfsCreationDate) {
+ this.creationDate = ntfsCreationDate;
+ }
+
+ public void setCreationDate(Date creationDate) {
+ this.creationDate = javaTimeToNtfsTime(creationDate);
+ }
+
+ public boolean getHasLastModifiedDate() {
+ return hasLastModifiedDate;
+ }
+
+ public void setHasLastModifiedDate(boolean hasLastModifiedDate) {
+ this.hasLastModifiedDate = hasLastModifiedDate;
+ }
+
+ public Date getLastModifiedDate() {
+ if (hasLastModifiedDate) {
+ return ntfsTimeToJavaTime(lastModifiedDate);
+ } else {
+ throw new UnsupportedOperationException(
+ "The entry doesn't have this timestamp");
+ }
+ }
+
+ public void setLastModifiedDate(long ntfsLastModifiedDate) {
+ this.lastModifiedDate = ntfsLastModifiedDate;
+ }
+
+ public void setLastModifiedDate(Date lastModifiedDate) {
+ this.lastModifiedDate = javaTimeToNtfsTime(lastModifiedDate);
+ }
+
+ public boolean getHasAcessDate() {
+ return hasAcessDate;
+ }
+
+ public void setHasAcessDate(boolean hasAcessDate) {
+ this.hasAcessDate = hasAcessDate;
+ }
+
+ public Date getAccessDate() {
+ if (hasAcessDate) {
+ return ntfsTimeToJavaTime(accessDate);
+ } else {
+ throw new UnsupportedOperationException(
+ "The entry doesn't have this timestamp");
+ }
+ }
+
+ public void setAccessDate(long ntfsAccessDate) {
+ this.accessDate = ntfsAccessDate;
+ }
+
+ public void setAccessDate(Date accessDate) {
+ this.accessDate = javaTimeToNtfsTime(accessDate);
+ }
+
+ public boolean getHasWindowsAttributes() {
+ return hasWindowsAttributes;
+ }
+
+ public void setHasWindowsAttributes(boolean hasWindowsAttributes) {
+ this.hasWindowsAttributes = hasWindowsAttributes;
+ }
+
+ public int getWindowsAttributes() {
+ return windowsAttributes;
+ }
+
+ public void setWindowsAttributes(int windowsAttributes) {
+ this.windowsAttributes = windowsAttributes;
+ }
+
+ public boolean getHasCrc() {
+ return hasCrc;
+ }
+
+ public void setHasCrc(boolean hasCrc) {
+ this.hasCrc = hasCrc;
+ }
+
+ public int getCrc() {
+ return crc;
+ }
+
+ public void setCrc(int crc) {
+ this.crc = crc;
+ }
+
+ public long getSize() {
+ return size;
+ }
+
+ public void setSize(long size) {
+ this.size = size;
+ }
+
+ /**
+ * Converts NTFS time (100 nanosecond units since 1 January 1601)
+ * to Java time.
+ * @param ntfsTime the NTFS time in 100 nanosecond units
+ * @return the Java time
+ */
+ public static Date ntfsTimeToJavaTime(final long ntfsTime) {
+ final Calendar ntfsEpoch = Calendar.getInstance();
+ ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0"));
+ ntfsEpoch.set(1601, 0, 1, 0, 0, 0);
+ ntfsEpoch.set(Calendar.MILLISECOND, 0);
+ final long realTime = ntfsEpoch.getTimeInMillis() + (ntfsTime / (10*1000));
+ return new Date(realTime);
+ }
+
+ /**
+ * Converts Java time to NTFS time.
+ * @param date the Java time
+ * @return the NTFS time
+ */
+ public static long javaTimeToNtfsTime(final Date date) {
+ final Calendar ntfsEpoch = Calendar.getInstance();
+ ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0"));
+ ntfsEpoch.set(1601, 0, 1, 0, 0, 0);
+ ntfsEpoch.set(Calendar.MILLISECOND, 0);
+ return ((date.getTime() - ntfsEpoch.getTimeInMillis())* 1000 * 10);
+ }
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
new file mode 100644
index 0000000..0ed381e
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
@@ -0,0 +1,1011 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.zip.CRC32;
+
+import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
+import org.tukaani.xz.LZMA2InputStream;
+
+/**
+ * Reads a 7z file, using RandomAccessFile under
+ * the covers.
+ * <p>
+ * The 7z file format is a flexible container
+ * that can contain many compression types, but
+ * at the moment only Copy and LZMA2 are
+ * supported, and archive header compression
+ * (which always uses the unsupported LZMA
+ * compression) isn't. So the only archives
+ * that can be read are the following:
+ * <pre>
+ * 7z -mhc=off -mx=0 archive.7z files
+ * 7z -mhc=off -m0=LZMA2 archive.7z files
+ * </pre>
+ * <p>
+ * The format is very Windows/Intel specific,
+ * so it uses little-endian byte order,
+ * doesn't store user/group or permission bits,
+ * and represents times using NTFS timestamps
+ * (100 nanosecond units since 1 January 1601).
+ * Hence the official tools recommend against
+ * using it for backup purposes on *nix, and
+ * recommend .tar.7z or .tar.lzma or .tar.xz
+ * instead.
+ * <p>
+ * Both the header and file contents may be
+ * compressed and/or encrypted. With both
+ * encrypted, neither file names nor file
+ * contents can be read, but the use of
+ * encryption isn't plausibly deniable.
+ *
+ * @NotThreadSafe
+ */
+public class SevenZFile {
+ private static final boolean DEBUG = false;
+ private static final int SIGNATURE_HEADER_SIZE = 32;
+ private RandomAccessFile file;
+ private final Archive archive;
+ private int currentEntryIndex = -1;
+ private int currentFolderIndex = -1;
+ private InputStream currentFolderInputStream = null;
+ private InputStream currentEntryInputStream = null;
+
+ private static final byte[] sevenZSignature = {
+ (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C
+ };
+
+ public SevenZFile(final File filename) throws IOException {
+ boolean succeeded = false;
+ this.file = new RandomAccessFile(filename, "r");
+ try {
+ archive = readHeaders();
+ succeeded = true;
+ } finally {
+ if (!succeeded) {
+ this.file.close();
+ }
+ }
+ }
+
+ public void close() {
+ if (file != null) {
+ try {
+ file.close();
+ } catch (IOException ignored) { // NOPMD
+ }
+ file = null;
+ }
+ }
+
+ private static void debug(String str) {
+ if (DEBUG) {
+ System.out.println(str);
+ }
+ }
+
+ private static void debug(String fmt, Object... args) {
+ if (DEBUG) {
+ System.out.format(fmt, args);
+ }
+ }
+
+ public SevenZArchiveEntry getNextEntry() throws IOException {
+ if (currentEntryIndex >= (archive.files.length - 1)) {
+ return null;
+ }
+ ++currentEntryIndex;
+ final SevenZArchiveEntry entry = archive.files[currentEntryIndex];
+ buildDecodingStream();
+ return entry;
+ }
+
+ private Archive readHeaders() throws IOException {
+ debug("SignatureHeader");
+
+ final byte[] signature = new byte[6];
+ file.readFully(signature);
+ if (!Arrays.equals(signature, sevenZSignature)) {
+ throw new IOException("Bad 7z signature");
+ }
+ // 7zFormat.txt has it wrong - it's first major then minor
+ final byte archiveVersionMajor = file.readByte();
+ final byte archiveVersionMinor = file.readByte();
+ debug(" archiveVersion major=%d, minor=%d\n",
+ archiveVersionMajor, archiveVersionMinor);
+ if (archiveVersionMajor != 0) {
+ throw new IOException(String.format("Unsupported 7z version (%d,%d)",
+ archiveVersionMajor, archiveVersionMinor));
+ }
+
+ final int startHeaderCrc = Integer.reverseBytes(file.readInt());
+ final StartHeader startHeader = readStartHeader(startHeaderCrc);
+
+ final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize;
+ if (nextHeaderSizeInt != startHeader.nextHeaderSize) {
+ throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize);
+ }
+ file.seek(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
+ final byte[] nextHeader = new byte[nextHeaderSizeInt];
+ file.readFully(nextHeader);
+ final CRC32 crc = new CRC32();
+ crc.update(nextHeader);
+ if (startHeader.nextHeaderCrc != (int) crc.getValue()) {
+ throw new IOException("NextHeader CRC mismatch");
+ }
+
+ final Archive archive = new Archive();
+ final ByteArrayInputStream byteStream = new ByteArrayInputStream(nextHeader);
+ final DataInputStream nextHeaderInputStream = new DataInputStream(
+ byteStream);
+ int nid = nextHeaderInputStream.readUnsignedByte();
+ if (nid == NID.kEncodedHeader) {
+ readEncodedHeader(nextHeaderInputStream, archive);
+ nid = nextHeaderInputStream.readUnsignedByte();
+ }
+ if (nid == NID.kHeader) {
+ readHeader(nextHeaderInputStream, archive);
+ }
+ return archive;
+ }
+
+ private StartHeader readStartHeader(final int startHeaderCrc) throws IOException {
+ final StartHeader startHeader = new StartHeader();
+ DataInputStream dataInputStream = null;
+ try {
+ dataInputStream = new DataInputStream(new CRC32VerifyingInputStream(
+ new BoundedRandomAccessFileInputStream(20), 20, startHeaderCrc));
+ startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong());
+ startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong());
+ startHeader.nextHeaderCrc = Integer.reverseBytes(dataInputStream.readInt());
+ return startHeader;
+ } finally {
+ if (dataInputStream != null) {
+ dataInputStream.close();
+ }
+ }
+ }
+
+ private void readHeader(final DataInput header, final Archive archive) throws IOException {
+ debug("Header");
+
+ int nid = header.readUnsignedByte();
+
+ if (nid == NID.kArchiveProperties) {
+ readArchiveProperties(header);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kAdditionalStreamsInfo) {
+ throw new IOException("Additional streams unsupported");
+ //nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kMainStreamsInfo) {
+ readStreamsInfo(header, archive);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kFilesInfo) {
+ readFilesInfo(header, archive);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid != NID.kEnd) {
+ throw new IOException("Badly terminated header");
+ }
+ }
+
+ private void readArchiveProperties(final DataInput input) throws IOException {
+ // FIXME: the reference implementation just throws them away?
+ debug("ArchiveProperties");
+
+ int nid = input.readUnsignedByte();
+ while (nid != NID.kEnd) {
+ final long propertySize = readUint64(input);
+ final byte[] property = new byte[(int)propertySize];
+ input.readFully(property);
+ nid = input.readUnsignedByte();
+ }
+ }
+
+ private void readEncodedHeader(final DataInputStream header, final Archive archive) throws IOException {
+ debug("EncodedHeader");
+
+ readStreamsInfo(header, archive);
+
+ // FIXME: and decompress it etc.
+
+ throw new IOException("LZMA compression unsupported, so files with compressed header cannot be read");
+ // FIXME: this extracts the header to an LZMA file which can then be
+ // manually decompressed.
+// long offset = SIGNATURE_HEADER_SIZE + archive.packPos;
+// file.seek(offset);
+// long unpackSize = archive.folders[0].getUnpackSize();
+// byte[] packed = new byte[(int)archive.packSizes[0]];
+// file.readFully(packed);
+//
+// FileOutputStream fos = new FileOutputStream(new File("/tmp/encodedHeader.7z"));
+// fos.write(archive.folders[0].coders[0].properties);
+// // size - assuming < 256
+// fos.write((int)(unpackSize & 0xff));
+// fos.write(0);
+// fos.write(0);
+// fos.write(0);
+// fos.write(0);
+// fos.write(0);
+// fos.write(0);
+// fos.write(0);
+// fos.write(packed);
+// fos.close();
+ }
+
+ private void readStreamsInfo(final DataInput header, final Archive archive) throws IOException {
+ debug("StreamsInfo");
+
+ int nid = header.readUnsignedByte();
+
+ if (nid == NID.kPackInfo) {
+ readPackInfo(header, archive);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kUnpackInfo) {
+ readUnpackInfo(header, archive);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kSubStreamsInfo) {
+ readSubStreamsInfo(header, archive);
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid != NID.kEnd) {
+ throw new IOException("Badly terminated StreamsInfo");
+ }
+ }
+
+ private void readPackInfo(final DataInput header, final Archive archive) throws IOException {
+ debug("PackInfo");
+
+ archive.packPos = readUint64(header);
+ final long numPackStreams = readUint64(header);
+ debug(" " + numPackStreams + " pack streams");
+
+ int nid = header.readUnsignedByte();
+ if (nid == NID.kSize) {
+ archive.packSizes = new long[(int)numPackStreams];
+ for (int i = 0; i < archive.packSizes.length; i++) {
+ archive.packSizes[i] = readUint64(header);
+ debug(" pack size %d is %d\n", i, archive.packSizes[i]);
+ }
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid == NID.kCRC) {
+ archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams);
+ archive.packCrcs = new int[(int)numPackStreams];
+ for (int i = 0; i < (int)numPackStreams; i++) {
+ if (archive.packCrcsDefined.get(i)) {
+ archive.packCrcs[i] = Integer.reverseBytes(header.readInt());
+ }
+ }
+
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid != NID.kEnd) {
+ throw new IOException("Badly terminated PackInfo (" + nid + ")");
+ }
+ }
+
+ private void readUnpackInfo(final DataInput header, final Archive archive) throws IOException {
+ debug("UnpackInfo");
+
+ int nid = header.readUnsignedByte();
+ if (nid != NID.kFolder) {
+ throw new IOException("Expected kFolder, got " + nid);
+ }
+ final long numFolders = readUint64(header);
+ debug(" " + numFolders + " folders");
+ final Folder[] folders = new Folder[(int)numFolders];
+ archive.folders = folders;
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("External unsupported");
+ } else {
+ for (int i = 0; i < (int)numFolders; i++) {
+ folders[i] = readFolder(header);
+ }
+ }
+
+ nid = header.readUnsignedByte();
+ if (nid != NID.kCodersUnpackSize) {
+ throw new IOException("Expected kCodersUnpackSize, got " + nid);
+ }
+ for (final Folder folder : folders) {
+ folder.unpackSizes = new long[(int)folder.totalOutputStreams];
+ for (int i = 0; i < folder.totalOutputStreams; i++) {
+ folder.unpackSizes[i] = readUint64(header);
+ }
+ }
+
+ nid = header.readUnsignedByte();
+ if (nid == NID.kCRC) {
+ final BitSet crcsDefined = readAllOrBits(header, (int)numFolders);
+ for (int i = 0; i < (int)numFolders; i++) {
+ if (crcsDefined.get(i)) {
+ folders[i].hasCrc = true;
+ folders[i].crc = Integer.reverseBytes(header.readInt());
+ } else {
+ folders[i].hasCrc = false;
+ }
+ }
+
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid != NID.kEnd) {
+ throw new IOException("Badly terminated UnpackInfo");
+ }
+ }
+
+ private void readSubStreamsInfo(final DataInput header, final Archive archive) throws IOException {
+ debug("SubStreamsInfo");
+
+ for (final Folder folder : archive.folders) {
+ folder.numUnpackSubStreams = 1;
+ }
+ int totalUnpackStreams = archive.folders.length;
+
+ int nid = header.readUnsignedByte();
+ if (nid == NID.kNumUnpackStream) {
+ totalUnpackStreams = 0;
+ for (final Folder folder : archive.folders) {
+ final long numStreams = readUint64(header);
+ folder.numUnpackSubStreams = (int)numStreams;
+ totalUnpackStreams += numStreams;
+ }
+ nid = header.readUnsignedByte();
+ }
+
+ final SubStreamsInfo subStreamsInfo = new SubStreamsInfo();
+ subStreamsInfo.unpackSizes = new long[(int)totalUnpackStreams];
+ subStreamsInfo.hasCrc = new BitSet((int)totalUnpackStreams);
+ subStreamsInfo.crcs = new int[(int)totalUnpackStreams];
+
+ int nextUnpackStream = 0;
+ for (final Folder folder : archive.folders) {
+ if (folder.numUnpackSubStreams == 0) {
+ continue;
+ }
+ long sum = 0;
+ if (nid == NID.kSize) {
+ for (int i = 0; i < (folder.numUnpackSubStreams - 1); i++) {
+ final long size = readUint64(header);
+ subStreamsInfo.unpackSizes[nextUnpackStream++] = size;
+ sum += size;
+ }
+ }
+ subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum;
+ }
+ if (nid == NID.kSize) {
+ nid = header.readUnsignedByte();
+ }
+
+ int numDigests = 0;
+ for (final Folder folder : archive.folders) {
+ if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) {
+ numDigests += folder.numUnpackSubStreams;
+ }
+ }
+
+ if (nid == NID.kCRC) {
+ final BitSet hasMissingCrc = readAllOrBits(header, (int)numDigests);
+ final int[] missingCrcs = new int[(int)numDigests];
+ for (int i = 0; i < (int)numDigests; i++) {
+ if (hasMissingCrc.get(i)) {
+ missingCrcs[i] = Integer.reverseBytes(header.readInt());
+ }
+ }
+ int nextCrc = 0;
+ int nextMissingCrc = 0;
+ for (final Folder folder: archive.folders) {
+ if (folder.numUnpackSubStreams == 1 && folder.hasCrc) {
+ subStreamsInfo.hasCrc.set(nextCrc, true);
+ subStreamsInfo.crcs[nextCrc] = folder.crc;
+ ++nextCrc;
+ } else {
+ for (int i = 0; i < folder.numUnpackSubStreams; i++) {
+ subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc));
+ subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc];
+ ++nextCrc;
+ ++nextMissingCrc;
+ }
+ }
+ }
+
+ nid = header.readUnsignedByte();
+ }
+
+ if (nid != NID.kEnd) {
+ throw new IOException("Badly terminated SubStreamsInfo");
+ }
+
+ archive.subStreamsInfo = subStreamsInfo;
+ }
+
+ private Folder readFolder(final DataInput header) throws IOException {
+ final Folder folder = new Folder();
+
+ final long numCoders = readUint64(header);
+ final Coder[] coders = new Coder[(int)numCoders];
+ long totalInStreams = 0;
+ long totalOutStreams = 0;
+ for (int i = 0; i < coders.length; i++) {
+ coders[i] = new Coder();
+ int bits = header.readUnsignedByte();
+ final int idSize = bits & 0xf;
+ final boolean isSimple = ((bits & 0x10) == 0);
+ final boolean hasAttributes = ((bits & 0x20) != 0);
+ final boolean moreAlternativeMethods = ((bits & 0x80) != 0);
+
+ coders[i].decompressionMethodId = new byte[idSize];
+ header.readFully(coders[i].decompressionMethodId);
+ if (isSimple) {
+ coders[i].numInStreams = 1;
+ coders[i].numOutStreams = 1;
+ } else {
+ coders[i].numInStreams = readUint64(header);
+ coders[i].numOutStreams = readUint64(header);
+ }
+ totalInStreams += coders[i].numInStreams;
+ totalOutStreams += coders[i].numOutStreams;
+ if (hasAttributes) {
+ final long propertiesSize = readUint64(header);
+ coders[i].properties = new byte[(int)propertiesSize];
+ header.readFully(coders[i].properties);
+ }
+ if (DEBUG) {
+ final StringBuilder methodStr = new StringBuilder();
+ for (final byte b : coders[i].decompressionMethodId) {
+ methodStr.append(String.format("%02X", 0xff & b));
+ }
+ debug(" coder entry %d numInStreams=%d, numOutStreams=%d, method=%s, properties=%s\n", i,
+ coders[i].numInStreams, coders[i].numOutStreams, methodStr.toString(),
+ Arrays.toString(coders[i].properties));
+ }
+ // would need to keep looping as above:
+ while (moreAlternativeMethods) {
+ throw new IOException("Alternative methods are unsupported, please report. " +
+ "The reference implementation doesn't support them either.");
+ }
+ }
+ folder.coders = coders;
+ folder.totalInputStreams = totalInStreams;
+ folder.totalOutputStreams = totalOutStreams;
+
+ if (totalOutStreams == 0) {
+ throw new IOException("Total output streams can't be 0");
+ }
+ final long numBindPairs = totalOutStreams - 1;
+ final BindPair[] bindPairs = new BindPair[(int)numBindPairs];
+ for (int i = 0; i < bindPairs.length; i++) {
+ bindPairs[i] = new BindPair();
+ bindPairs[i].inIndex = readUint64(header);
+ bindPairs[i].outIndex = readUint64(header);
+ debug(" bind pair in=%d out=%d\n", bindPairs[i].inIndex, bindPairs[i].outIndex);
+ }
+ folder.bindPairs = bindPairs;
+
+ if (totalInStreams < numBindPairs) {
+ throw new IOException("Total input streams can't be less than the number of bind pairs");
+ }
+ final long numPackedStreams = totalInStreams - numBindPairs;
+ final long packedStreams[] = new long[(int)numPackedStreams];
+ if (numPackedStreams == 1) {
+ int i;
+ for (i = 0; i < (int)totalInStreams; i++) {
+ if (folder.findBindPairForInStream(i) < 0) {
+ break;
+ }
+ }
+ if (i == (int)totalInStreams) {
+ throw new IOException("Couldn't find stream's bind pair index");
+ }
+ packedStreams[0] = i;
+ } else {
+ for (int i = 0; i < (int)numPackedStreams; i++) {
+ packedStreams[i] = readUint64(header);
+ }
+ }
+ folder.packedStreams = packedStreams;
+
+ return folder;
+ }
+
+ private BitSet readAllOrBits(final DataInput header, final int size) throws IOException {
+ final int areAllDefined = header.readUnsignedByte();
+ final BitSet bits;
+ if (areAllDefined != 0) {
+ bits = new BitSet(size);
+ for (int i = 0; i < size; i++) {
+ bits.set(i, true);
+ }
+ } else {
+ bits = readBits(header, size);
+ }
+ return bits;
+ }
+
+ private BitSet readBits(final DataInput header, final int size) throws IOException {
+ final BitSet bits = new BitSet(size);
+ int mask = 0;
+ int cache = 0;
+ for (int i = 0; i < size; i++) {
+ if (mask == 0) {
+ mask = 0x80;
+ cache = header.readUnsignedByte();
+ }
+ bits.set(i, (cache & mask) != 0);
+ mask >>>= 1;
+ }
+ return bits;
+ }
+
+ private void readFilesInfo(final DataInput header, final Archive archive) throws IOException {
+ debug("FilesInfo");
+
+ final long numFiles = readUint64(header);
+ final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles];
+ for (int i = 0; i < files.length; i++) {
+ files[i] = new SevenZArchiveEntry();
+ }
+ BitSet isEmptyStream = null;
+ BitSet isEmptyFile = null;
+ BitSet isAnti = null;
+ while (true) {
+ final int propertyType = header.readUnsignedByte();
+ if (propertyType == 0) {
+ break;
+ }
+ long size = readUint64(header);
+ switch (propertyType) {
+ case NID.kEmptyStream: {
+ debug(" kEmptyStream");
+ isEmptyStream = readBits(header, files.length);
+ break;
+ }
+ case NID.kEmptyFile: {
+ debug(" kEmptyFile");
+ isEmptyFile = readBits(header, isEmptyStream.cardinality());
+ break;
+ }
+ case NID.kAnti: {
+ debug(" kAnti");
+ isAnti = readBits(header, isEmptyStream.cardinality());
+ break;
+ }
+ case NID.kName: {
+ debug(" kNames");
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("Not implemented");
+ } else {
+ if (((size - 1) & 1) != 0) {
+ throw new IOException("File names length invalid");
+ }
+ final byte[] names = new byte[(int)(size - 1)];
+ header.readFully(names);
+ int nextFile = 0;
+ int nextName = 0;
+ for (int i = 0; i < names.length; i += 2) {
+ if (names[i] == 0 && names[i+1] == 0) {
+ files[nextFile++].setName(new String(names, nextName, i-nextName, "UTF-16LE"));
+ nextName = i + 2;
+ }
+ }
+ if (nextName != names.length || nextFile != files.length) {
+ throw new IOException("Error parsing file names");
+ }
+ }
+ break;
+ }
+ case NID.kCTime: {
+ debug(" kCreationTime");
+ final BitSet timesDefined = readAllOrBits(header, files.length);
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("Unimplemented");
+ } else {
+ for (int i = 0; i < files.length; i++) {
+ files[i].setHasCreationDate(timesDefined.get(i));
+ if (files[i].getHasCreationDate()) {
+ files[i].setCreationDate(Long.reverseBytes(header.readLong()));
+ }
+ }
+ }
+ break;
+ }
+ case NID.kATime: {
+ debug(" kLastAccessTime");
+ final BitSet timesDefined = readAllOrBits(header, files.length);
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("Unimplemented");
+ } else {
+ for (int i = 0; i < files.length; i++) {
+ files[i].setHasAcessDate(timesDefined.get(i));
+ if (files[i].getHasAcessDate()) {
+ files[i].setAccessDate(Long.reverseBytes(header.readLong()));
+ }
+ }
+ }
+ break;
+ }
+ case NID.kMTime: {
+ debug(" kLastWriteTime");
+ final BitSet timesDefined = readAllOrBits(header, files.length);
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("Unimplemented");
+ } else {
+ for (int i = 0; i < files.length; i++) {
+ files[i].setHasLastModifiedDate(timesDefined.get(i));
+ if (files[i].getHasLastModifiedDate()) {
+ files[i].setLastModifiedDate(Long.reverseBytes(header.readLong()));
+ }
+ }
+ }
+ break;
+ }
+ case NID.kWinAttributes: {
+ debug(" kWinAttributes");
+ final BitSet attributesDefined = readAllOrBits(header, files.length);
+ final int external = header.readUnsignedByte();
+ if (external != 0) {
+ throw new IOException("Unimplemented");
+ } else {
+ for (int i = 0; i < files.length; i++) {
+ files[i].setHasWindowsAttributes(attributesDefined.get(i));
+ if (files[i].getHasWindowsAttributes()) {
+ files[i].setWindowsAttributes(Integer.reverseBytes(header.readInt()));
+ }
+ }
+ }
+ break;
+ }
+ case NID.kStartPos: {
+ debug(" kStartPos");
+ throw new IOException("kStartPos is unsupported, please report");
+ }
+ case NID.kDummy: {
+ debug(" kDummy");
+ throw new IOException("kDummy is unsupported, please report");
+ }
+
+ default: {
+ throw new IOException("Unknown property " + propertyType);
+ // FIXME: Should actually:
+ //header.skipBytes((int)size);
+ }
+ }
+ }
+ int nonEmptyFileCounter = 0;
+ int emptyFileCounter = 0;
+ for (int i = 0; i < files.length; i++) {
+ files[i].setHasStream((isEmptyStream == null) ? true : !isEmptyStream.get(i));
+ if (files[i].hasStream()) {
+ files[i].setDirectory(false);
+ files[i].setAntiItem(false);
+ files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter));
+ files[i].setCrc(archive.subStreamsInfo.crcs[nonEmptyFileCounter]);
+ files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]);
+ ++nonEmptyFileCounter;
+ } else {
+ files[i].setDirectory((isEmptyFile == null) ? true : !isEmptyFile.get(emptyFileCounter));
+ files[i].setAntiItem((isAnti == null) ? false : isAnti.get(emptyFileCounter));
+ files[i].setHasCrc(false);
+ files[i].setSize(0);
+ ++emptyFileCounter;
+ }
+ }
+ archive.files = files;
+ calculateStreamMap(archive);
+ }
+
+ private void calculateStreamMap(final Archive archive) throws IOException {
+ final StreamMap streamMap = new StreamMap();
+
+ int nextFolderPackStreamIndex = 0;
+ streamMap.folderFirstPackStreamIndex = new int[archive.folders.length];
+ for (int i = 0; i < archive.folders.length; i++) {
+ streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex;
+ nextFolderPackStreamIndex += archive.folders[i].packedStreams.length;
+ }
+
+ long nextPackStreamOffset = 0;
+ streamMap.packStreamOffsets = new long[archive.packSizes.length];
+ for (int i = 0; i < archive.packSizes.length; i++) {
+ streamMap.packStreamOffsets[i] = nextPackStreamOffset;
+ nextPackStreamOffset += archive.packSizes[i];
+ }
+
+ streamMap.folderFirstFileIndex = new int[archive.folders.length];
+ streamMap.fileFolderIndex = new int[archive.files.length];
+ int nextFolderIndex = 0;
+ int nextFolderUnpackStreamIndex = 0;
+ for (int i = 0; i < archive.files.length; i++) {
+ if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) {
+ streamMap.fileFolderIndex[i] = -1;
+ continue;
+ }
+ if (nextFolderUnpackStreamIndex == 0) {
+ for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) {
+ streamMap.folderFirstFileIndex[nextFolderIndex] = i;
+ if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) {
+ break;
+ }
+ }
+ if (nextFolderIndex >= archive.folders.length) {
+ throw new IOException("Too few folders in archive");
+ }
+ }
+ streamMap.fileFolderIndex[i] = nextFolderIndex;
+ if (!archive.files[i].hasStream()) {
+ continue;
+ }
+ ++nextFolderUnpackStreamIndex;
+ if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) {
+ ++nextFolderIndex;
+ nextFolderUnpackStreamIndex = 0;
+ }
+ }
+
+ archive.streamMap = streamMap;
+ }
+
+ private void buildDecodingStream() throws IOException {
+ final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex];
+ if (folderIndex < 0) {
+ currentEntryInputStream = new BoundedInputStream(
+ new ByteArrayInputStream(new byte[0]), 0);
+ return;
+ }
+ if (currentFolderIndex == folderIndex) {
+ // need to advance the folder input stream past the current file
+ drainPreviousEntry();
+ } else {
+ currentFolderIndex = folderIndex;
+ if (currentFolderInputStream != null) {
+ currentFolderInputStream.close();
+ currentFolderInputStream = null;
+ }
+
+ final Folder folder = archive.folders[folderIndex];
+ final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex];
+ final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
+ archive.streamMap.packStreamOffsets[firstPackStreamIndex];
+ currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex);
+ }
+ final SevenZArchiveEntry file = archive.files[currentEntryIndex];
+ final InputStream fileStream = new BoundedInputStream(
+ currentFolderInputStream, file.getSize());
+ if (file.getHasCrc()) {
+ currentEntryInputStream = new CRC32VerifyingInputStream(
+ fileStream, file.getSize(), file.getCrc());
+ } else {
+ currentEntryInputStream = fileStream;
+ }
+
+ }
+
+ private void drainPreviousEntry() throws IOException {
+ if (currentEntryInputStream != null) {
+ final byte[] buffer = new byte[64*1024];
+ while (currentEntryInputStream.read(buffer) >= 0) { // NOPMD
+ }
+ currentEntryInputStream.close();
+ currentEntryInputStream = null;
+ }
+ }
+
+ private InputStream buildDecoderStack(final Folder folder, final long folderOffset,
+ final int firstPackStreamIndex) throws IOException {
+ InputStream inputStreamStack = null;
+ for (int i = 0; i < folder.coders.length; i++) {
+ if (i > 0) {
+ throw new IOException("Unsupported multi-codec stream");
+ }
+ file.seek(folderOffset);
+ if (folder.coders[i].decompressionMethodId.length == 1 &&
+ folder.coders[i].decompressionMethodId[0] == 0) {
+ // 00 - Copy
+ inputStreamStack = new BoundedRandomAccessFileInputStream(
+ archive.packSizes[firstPackStreamIndex]);
+ // FIXME: LZMA is the default coder yet ironically we don't have it.
+// } else if (folder.coders[i].decompressionMethodId.length == 3 &&
+// folder.coders[i].decompressionMethodId[0] == 3 &&
+// folder.coders[i].decompressionMethodId[1] == 1 &&
+// folder.coders[i].decompressionMethodId[2] == 1) {
+// // 03.. - 7z
+// // 01 - LZMA
+// // 01 - Version
+ } else if (folder.coders[i].decompressionMethodId.length == 1 &&
+ folder.coders[i].decompressionMethodId[0] == 0x21) {
+ // 21 - LZMA2
+ final int dictionarySizeBits = 0xff & folder.coders[i].properties[0];
+ if ((dictionarySizeBits & (~0x3f)) != 0) {
+ throw new IOException("Unsupported LZMA2 property bits");
+ }
+ if (dictionarySizeBits > 40) {
+ throw new IOException("Dictionary larger than 4GiB maximum size");
+ }
+ final int dictionarySize;
+ if (dictionarySizeBits == 40) {
+ dictionarySize = 0xFFFFffff;
+ } else {
+ dictionarySize = (2 | (dictionarySizeBits & 0x1)) << (dictionarySizeBits / 2 + 11);
+ }
+ inputStreamStack = new LZMA2InputStream(
+ new BoundedRandomAccessFileInputStream(
+ archive.packSizes[firstPackStreamIndex]),
+ dictionarySize);
+ // FIXME: gives corrupt output:
+// } else if (folder.coders[i].decompressionMethodId.length == 3 &&
+// folder.coders[i].decompressionMethodId[0] == 0x4 &&
+// folder.coders[i].decompressionMethodId[1] == 0x1 &&
+// folder.coders[i].decompressionMethodId[2] == 0x8) {
+// // 04.. - Misc
+// // 00 - Reserved
+// // 01 - Zip
+// // 00 - Copy (not used). Use {00} instead
+// // 01 - Shrink
+// // 06 - Implode
+// // 08 - Deflate
+// return new DeflaterInputStream(
+// new BoundedRandomAccessFileInputStream(
+// archive.packSizes[firstPackStreamIndex]),
+// new Deflater(Deflater.DEFAULT_COMPRESSION, true));
+ } else {
+ throw new IOException("Unsupported compression method " +
+ Arrays.toString(folder.coders[i].decompressionMethodId));
+ }
+ }
+ if (folder.hasCrc) {
+ return new CRC32VerifyingInputStream(inputStreamStack,
+ folder.getUnpackSize(), folder.crc);
+ } else {
+ return inputStreamStack;
+ }
+ }
+
+ public int read() throws IOException {
+ return currentEntryInputStream.read();
+ }
+
+ public int read(byte[] b) throws IOException {
+ return read(b, 0, b.length);
+ }
+
+ public int read(byte[] b, int off, int len) throws IOException {
+ return currentEntryInputStream.read(b, off, len);
+ }
+
+ private static long readUint64(final DataInput in) throws IOException {
+ int firstByte = in.readUnsignedByte();
+ int mask = 0x80;
+ int value = 0;
+ for (int i = 0; i < 8; i++) {
+ if ((firstByte & mask) == 0) {
+ return value | ((firstByte & (mask - 1)) << (8 * i));
+ }
+ int nextByte = in.readUnsignedByte();
+ value |= (nextByte << (8 * i));
+ mask >>>= 1;
+ }
+ return value;
+ }
+
+ private class BoundedRandomAccessFileInputStream extends InputStream {
+ private long bytesRemaining;
+
+ public BoundedRandomAccessFileInputStream(final long size) {
+ bytesRemaining = size;
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (bytesRemaining > 0) {
+ --bytesRemaining;
+ return file.read();
+ } else {
+ return -1;
+ }
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if (bytesRemaining == 0) {
+ return -1;
+ }
+ int bytesToRead = len;
+ if (bytesToRead > bytesRemaining) {
+ bytesToRead = (int) bytesRemaining;
+ }
+ final int bytesRead = file.read(b, off, bytesToRead);
+ if (bytesRead >= 0) {
+ bytesRemaining -= bytesRead;
+ }
+ return bytesRead;
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+
+ private static class BoundedInputStream extends InputStream {
+ private InputStream is;
+ private long bytesRemaining;
+
+ public BoundedInputStream(final InputStream is, final long size) {
+ this.is = is;
+ bytesRemaining = size;
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (bytesRemaining > 0) {
+ --bytesRemaining;
+ return is.read();
+ } else {
+ return -1;
+ }
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ if (bytesRemaining == 0) {
+ return -1;
+ }
+ int bytesToRead = len;
+ if (bytesToRead > bytesRemaining) {
+ bytesToRead = (int) bytesRemaining;
+ }
+ final int bytesRead = is.read(b, off, bytesToRead);
+ if (bytesRead >= 0) {
+ bytesRemaining -= bytesRead;
+ }
+ return bytesRead;
+ }
+
+ @Override
+ public void close() {
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
new file mode 100644
index 0000000..4faa083
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class StartHeader {
+ long nextHeaderOffset;
+ long nextHeaderSize;
+ int nextHeaderCrc;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java
new file mode 100644
index 0000000..4638acc
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+/// Map between folders, files and streams.
+class StreamMap {
+ /// The first Archive.packStream index of each folder.
+ int[] folderFirstPackStreamIndex;
+ /// Offset to beginning of this pack stream's data, relative to the beginning of the first pack stream.
+ long[] packStreamOffsets;
+ /// Index of first file for each folder.
+ int[] folderFirstFileIndex;
+ /// Index of folder for each file.
+ int[] fileFolderIndex;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java
new file mode 100644
index 0000000..04fb814
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.BitSet;
+
+/// Properties for non-empty files.
+class SubStreamsInfo {
+ /// Unpacked size of each unpacked stream.
+ long[] unpackSizes;
+ /// Whether CRC is present for each unpacked stream.
+ BitSet hasCrc;
+ /// CRCs of unpacked streams, if present.
+ int[] crcs;
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html
new file mode 100644
index 0000000..6b85bcf
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html
@@ -0,0 +1,24 @@
+<html>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+ <body>
+ <p>Provides classes for reading archives using
+ the 7z format.</p>
+ </body>
+</html>
diff --git a/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java
new file mode 100644
index 0000000..6377fb6
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.CRC32;
+
+public class CRC32VerifyingInputStream extends InputStream {
+ private final InputStream is;
+ private long bytesRemaining;
+ private final int expectedCrc32;
+ private final CRC32 crc32 = new CRC32();
+
+ public CRC32VerifyingInputStream(final InputStream is, final long size, final int expectedCrc32) {
+ this.is = is;
+ this.expectedCrc32 = expectedCrc32;
+ this.bytesRemaining = size;
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (bytesRemaining <= 0) {
+ return -1;
+ }
+ int ret = is.read();
+ if (ret >= 0) {
+ crc32.update(ret);
+ --bytesRemaining;
+ }
+ if (bytesRemaining == 0 && expectedCrc32 != (int)crc32.getValue()) {
+ throw new IOException("CRC32 verification failed");
+ }
+ return ret;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ int ret = is.read(b, off, len);
+ if (ret >= 0) {
+ crc32.update(b, off, ret);
+ bytesRemaining -= ret;
+ }
+ if (bytesRemaining <= 0 && expectedCrc32 != (int)crc32.getValue()) {
+ throw new IOException("CRC32 verification failed");
+ }
+ return ret;
+ }
+
+ @Override
+ public long skip(long n) throws IOException {
+ // Can't really skip, we have to hash everything to verify the checksum
+ if (read() >= 0) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ is.close();
+ }
+}
diff --git a/src/site/xdoc/examples.xml b/src/site/xdoc/examples.xml
index a48cb31..1ab9da9 100644
--- a/src/site/xdoc/examples.xml
+++ b/src/site/xdoc/examples.xml
@@ -33,7 +33,7 @@
<p>The compressor formats supported are gzip, bzip2, xz and
Pack200, the archiver formats are ar, cpio, dump (read-only),
- tar and zip. Pack200 is a special case as it can only
+ tar, zip and 7z. Pack200 is a special case as it can only
compress JAR files.</p>
</subsection>
@@ -424,6 +424,21 @@
]]></source>
</subsection>
+ <subsection name="7z">
+
+ <p>Uncompressing a given 7z archive (you would
+ certainly add exception handling and make sure all streams
+ get closed properly):</p>
+<source><![CDATA[
+SevenZFile sevenZFile = new SevenZFile(new File("archive.7z"));
+SevenZArchiveEntry entry = sevenZFile.getNextEntry();
+byte[] content = new byte[entry.getSize()];
+LOOP UNTIL entry.getSize() HAS BEEN READ {
+ sevenZFile.read(content, offset, content.length - offset);
+}
+]]></source>
+ </subsection>
+
</section>
</body>
</document>
diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml
index 10367ae..d2a12f7 100644
--- a/src/site/xdoc/index.xml
+++ b/src/site/xdoc/index.xml
@@ -26,7 +26,8 @@
<section name="Apache Commons Compress™">
<p>
The Apache Commons Compress library defines an API for
- working with ar, cpio, Unix dump, tar, zip, gzip, XZ, Pack200 and bzip2 files.
+ working with ar, cpio, Unix dump, tar, zip, gzip, XZ, Pack200,
+ bzip2 and 7z files.
</p>
<p>
The code in this component has many origins:
@@ -71,7 +72,7 @@
domain <a href="http://tukaani.org/xz/java.html">XZ for
Java</a> library.</p>
- <p>The ar, cpio, dump, tar and zip formats are supported as
+ <p>The ar, cpio, dump, tar, 7z and zip formats are supported as
archivers where the <a href="zip.html">zip</a>
implementation provides capabilities that go beyond the
features found in java.util.zip. As of Commons Compress
diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
new file mode 100644
index 0000000..7ad1e60
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import org.apache.commons.compress.AbstractTestCase;
+
+public class SevenZFileTest extends AbstractTestCase {
+ public void testHelloWorldHeaderCompressionOffCopy() throws Exception {
+ checkHelloWorld("7z-hello-mhc-off-copy.7z");
+ }
+
+ public void testHelloWorldHeaderCompressionOffLZMA2() throws Exception {
+ checkHelloWorld("7z-hello-mhc-off-lzma2.7z");
+ }
+
+ private void checkHelloWorld(final String filename) throws Exception {
+ SevenZFile sevenZFile = new SevenZFile(getFile(filename));
+ try {
+ SevenZArchiveEntry entry = sevenZFile.getNextEntry();
+ assertEquals("Hello world.txt", entry.getName());
+ byte[] contents = new byte[(int)entry.getSize()];
+ int off = 0;
+ while ((off < contents.length)) {
+ int bytesRead = sevenZFile.read(contents, off, contents.length - off);
+ assert(bytesRead >= 0);
+ off += bytesRead;
+ }
+ assertEquals("Hello, world!\n", new String(contents, "UTF-8"));
+ assertNull(sevenZFile.getNextEntry());
+ } finally {
+ sevenZFile.close();
+ }
+ }
+}
diff --git a/src/test/resources/7z-hello-mhc-off-copy.7z b/src/test/resources/7z-hello-mhc-off-copy.7z
new file mode 100644
index 0000000..2cc91da
--- /dev/null
+++ b/src/test/resources/7z-hello-mhc-off-copy.7z
Binary files differ
diff --git a/src/test/resources/7z-hello-mhc-off-lzma2.7z b/src/test/resources/7z-hello-mhc-off-lzma2.7z
new file mode 100644
index 0000000..0fbcd25
--- /dev/null
+++ b/src/test/resources/7z-hello-mhc-off-lzma2.7z
Binary files differ