Add preliminary read-only support for the 7z archive format, together with Copy and LZMA2 decompression, some tests, documentation, and Javadocs. Give attribution as per LEGAL-72. Jira issue key: COMPRESS-54 git-svn-id: https://svn.apache.org/repos/asf/commons/proper/compress/trunk@1480055 13f79535-47bb-0310-9956-ffa450edef68

commit: f69ccab744282fc7335dba46656f01ea92901678 [log] [tgz]
author: Damjan Jovanovic <damjan@apache.org> Tue May 07 20:06:37 2013 +0000
committer: Damjan Jovanovic <damjan@apache.org> Tue May 07 20:06:37 2013 +0000
tree: 3013cac33211176a0f95e5aa849674106d968835
parent: d244dc6f6b7b1dba9f20cd7698fc576b2ec5e4ac [diff]
diff --git a/NOTICE.txt b/NOTICE.txt
index 9b5b734..6f183f8 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt

@@ -3,3 +3,11 @@
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
+
+====================
+
+The files in the package org.apache.commons.compress.archivers.sevenz
+were derived from the LZMA SDK, version 9.20 (C/ and CPP/7zip/),
+which has been placed in the public domain:
+
+"LZMA SDK is placed in the public domain." (http://www.7-zip.org/sdk.html)

diff --git a/pom.xml b/pom.xml
index 48815f7..d9a2ed1 100644
--- a/pom.xml
+++ b/pom.xml

@@ -31,7 +31,7 @@
   <!-- The description is not indented to make it look better in the release notes -->
   <description>
 Apache Commons Compress software defines an API for working with compression and archive formats.
-These include: bzip2, gzip, pack200, xz and ar, cpio, jar, tar, zip, dump.
+These include: bzip2, gzip, pack200, xz and ar, cpio, jar, tar, zip, dump, 7z.
   </description>
 
   <properties>

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java
new file mode 100644
index 0000000..cd8d895
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java

@@ -0,0 +1,39 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.BitSet;
+
+class Archive {
+    /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams.
+    long packPos;
+    /// Size of each packed stream.
+    long[] packSizes;
+    /// Whether each particular packed streams has a CRC.
+    BitSet packCrcsDefined;
+    /// CRCs for each packed stream, valid only if that packed stream has one.
+    int[] packCrcs;
+    /// Properties of solid compression blocks.
+    Folder[] folders;
+    /// Temporary properties for non-empty files (subsumed into the files array later).
+    SubStreamsInfo subStreamsInfo;
+    /// The files and directories in the archive.
+    SevenZArchiveEntry[] files;
+    /// Mapping between folders, files and streams.
+    StreamMap streamMap;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java
new file mode 100644
index 0000000..96e5bab
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java

@@ -0,0 +1,23 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class BindPair {
+    long inIndex;
+    long outIndex;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java
new file mode 100644
index 0000000..cbd271d
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java

@@ -0,0 +1,25 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class Coder {
+    byte[] decompressionMethodId;
+    long numInStreams;
+    long numOutStreams;
+    byte[] properties = null;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java
new file mode 100644
index 0000000..eff28e0
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java

@@ -0,0 +1,73 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+/**
+ * The unit of solid compression.
+ */
+class Folder {
+    /// List of coders used in this folder, eg. one for compression, one for encryption.
+    Coder[] coders;
+    /// Total number of input streams across all coders.
+    long totalInputStreams;
+    /// Total number of output streams across all coders.
+    long totalOutputStreams;
+    /// Mapping between input and output streams.
+    BindPair[] bindPairs;
+    /// Indeces of input streams, one per input stream not listed in bindPairs.
+    long[] packedStreams;
+    /// Unpack sizes, per each output stream.
+    long[] unpackSizes;
+    /// Whether the folder has a CRC.
+    boolean hasCrc;
+    /// The CRC, if present.
+    int crc;
+    /// The number of unpack substreams, one per non-empty file in this folder.
+    int numUnpackSubStreams;
+
+    int findBindPairForInStream(final int index) {
+        for (int i = 0; i < bindPairs.length; i++) {
+            if (bindPairs[i].inIndex == index) {
+                return i;
+            }
+        }
+        return -1;
+    }
+    
+    int findBindPairForOutStream(final int index) {
+        for (int i = 0; i < bindPairs.length; i++) {
+            if (bindPairs[i].outIndex == index) {
+                return i;
+            }
+        }
+        return -1;
+    }
+    
+    long getUnpackSize() {
+        if (totalOutputStreams == 0) {
+            return 0;
+        }
+        for (int i = ((int)totalOutputStreams) - 1; i >= 0; i--) {
+            if (findBindPairForOutStream(i) < 0) {
+                return unpackSizes[i];
+            }
+        }
+        return 0;
+    }
+}
+

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java
new file mode 100644
index 0000000..89a813a
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java

@@ -0,0 +1,47 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+final class NID {
+    public static final int kEnd = 0x00;
+    public static final int kHeader = 0x01;
+    public static final int kArchiveProperties = 0x02;
+    public static final int kAdditionalStreamsInfo = 0x03;
+    public static final int kMainStreamsInfo = 0x04;
+    public static final int kFilesInfo = 0x05;
+    public static final int kPackInfo = 0x06;
+    public static final int kUnpackInfo = 0x07;
+    public static final int kSubStreamsInfo = 0x08;
+    public static final int kSize = 0x09;
+    public static final int kCRC = 0x0A;
+    public static final int kFolder = 0x0B;
+    public static final int kCodersUnpackSize = 0x0C;
+    public static final int kNumUnpackStream = 0x0D;
+    public static final int kEmptyStream = 0x0E;
+    public static final int kEmptyFile = 0x0F;
+    public static final int kAnti = 0x10;
+    public static final int kName = 0x11;
+    public static final int kCTime = 0x12;
+    public static final int kATime = 0x13;
+    public static final int kMTime = 0x14;
+    public static final int kWinAttributes = 0x15;
+    public static final int kComment = 0x16;
+    public static final int kEncodedHeader = 0x17;
+    public static final int kStartPos = 0x18;
+    public static final int kDummy = 0x19;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java
new file mode 100644
index 0000000..a13e50e
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java

@@ -0,0 +1,235 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+
+import org.apache.commons.compress.archivers.ArchiveEntry;
+
+/**
+ * An entry in a 7z archive.
+ * 
+ * @NotThreadSafe
+ */
+public class SevenZArchiveEntry implements ArchiveEntry {
+    private String name;
+    private boolean hasStream;
+    private boolean isDirectory;
+    private boolean isAntiItem;
+    private boolean hasCreationDate;
+    private boolean hasLastModifiedDate;
+    private boolean hasAcessDate;
+    private long creationDate;
+    private long lastModifiedDate;
+    private long accessDate;
+    private boolean hasWindowsAttributes;
+    private int windowsAttributes;
+    private boolean hasCrc;
+    private int crc;
+    private long size;
+    
+    public SevenZArchiveEntry() {
+    }
+    
+    public String getName() {
+        return name;
+    }
+    
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public boolean hasStream() {
+        return hasStream;
+    }
+
+    public void setHasStream(boolean hasStream) {
+        this.hasStream = hasStream;
+    }
+
+    public boolean isDirectory() {
+        return isDirectory;
+    }
+    
+    public void setDirectory(boolean isDirectory) {
+        this.isDirectory = isDirectory;
+    }
+    
+    /**
+     * Indicates whether this is an "anti-item" used in differential backups,
+     * meaning it should delete the same file from a previous backup. 
+     * @return true if it is an anti-item, false otherwise
+     */
+    public boolean isAntiItem() {
+        return isAntiItem;
+    }
+
+    /**
+     * Sets whether this is an "anti-item" used in differential backups,
+     * meaning it should delete the same file from a previous backup.
+     * @param isAntiItem true if it is an ait-item, false otherwise 
+     */
+    public void setAntiItem(boolean isAntiItem) {
+        this.isAntiItem = isAntiItem;
+    }
+
+    public boolean getHasCreationDate() {
+        return hasCreationDate;
+    }
+    
+    public void setHasCreationDate(boolean hasCreationDate) {
+        this.hasCreationDate = hasCreationDate;
+    }
+    
+    public Date getCreationDate() {
+        if (hasCreationDate) {
+            return ntfsTimeToJavaTime(creationDate);
+        } else {
+            throw new UnsupportedOperationException(
+                    "The entry doesn't have this timestamp");
+        }
+    }
+    
+    public void setCreationDate(long ntfsCreationDate) {
+        this.creationDate = ntfsCreationDate;
+    }
+    
+    public void setCreationDate(Date creationDate) {
+        this.creationDate = javaTimeToNtfsTime(creationDate);
+    }
+
+    public boolean getHasLastModifiedDate() {
+        return hasLastModifiedDate;
+    }
+
+    public void setHasLastModifiedDate(boolean hasLastModifiedDate) {
+        this.hasLastModifiedDate = hasLastModifiedDate;
+    }
+
+    public Date getLastModifiedDate() {
+        if (hasLastModifiedDate) {
+            return ntfsTimeToJavaTime(lastModifiedDate);
+        } else {
+            throw new UnsupportedOperationException(
+                    "The entry doesn't have this timestamp");
+        }
+    }
+    
+    public void setLastModifiedDate(long ntfsLastModifiedDate) {
+        this.lastModifiedDate = ntfsLastModifiedDate;
+    }
+    
+    public void setLastModifiedDate(Date lastModifiedDate) {
+        this.lastModifiedDate = javaTimeToNtfsTime(lastModifiedDate);
+    }
+    
+    public boolean getHasAcessDate() {
+        return hasAcessDate;
+    }
+
+    public void setHasAcessDate(boolean hasAcessDate) {
+        this.hasAcessDate = hasAcessDate;
+    }
+
+    public Date getAccessDate() {
+        if (hasAcessDate) {
+            return ntfsTimeToJavaTime(accessDate);
+        } else {
+            throw new UnsupportedOperationException(
+                    "The entry doesn't have this timestamp");
+        }
+    }
+    
+    public void setAccessDate(long ntfsAccessDate) {
+        this.accessDate = ntfsAccessDate;
+    }
+    
+    public void setAccessDate(Date accessDate) {
+        this.accessDate = javaTimeToNtfsTime(accessDate);
+    }
+
+    public boolean getHasWindowsAttributes() {
+        return hasWindowsAttributes;
+    }
+
+    public void setHasWindowsAttributes(boolean hasWindowsAttributes) {
+        this.hasWindowsAttributes = hasWindowsAttributes;
+    }
+
+    public int getWindowsAttributes() {
+        return windowsAttributes;
+    }
+
+    public void setWindowsAttributes(int windowsAttributes) {
+        this.windowsAttributes = windowsAttributes;
+    }
+
+    public boolean getHasCrc() {
+        return hasCrc;
+    }
+
+    public void setHasCrc(boolean hasCrc) {
+        this.hasCrc = hasCrc;
+    }
+
+    public int getCrc() {
+        return crc;
+    }
+
+    public void setCrc(int crc) {
+        this.crc = crc;
+    }
+
+    public long getSize() {
+        return size;
+    }
+    
+    public void setSize(long size) {
+        this.size = size;
+    }
+
+    /**
+     * Converts NTFS time (100 nanosecond units since 1 January 1601)
+     * to Java time.
+     * @param ntfsTime the NTFS time in 100 nanosecond units
+     * @return the Java time
+     */
+    public static Date ntfsTimeToJavaTime(final long ntfsTime) {
+        final Calendar ntfsEpoch = Calendar.getInstance();
+        ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0"));
+        ntfsEpoch.set(1601, 0, 1, 0, 0, 0);
+        ntfsEpoch.set(Calendar.MILLISECOND, 0);
+        final long realTime = ntfsEpoch.getTimeInMillis() + (ntfsTime / (10*1000));
+        return new Date(realTime);
+    }
+    
+    /**
+     * Converts Java time to NTFS time.
+     * @param date the Java time
+     * @return the NTFS time
+     */
+    public static long javaTimeToNtfsTime(final Date date) {
+        final Calendar ntfsEpoch = Calendar.getInstance();
+        ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0"));
+        ntfsEpoch.set(1601, 0, 1, 0, 0, 0);
+        ntfsEpoch.set(Calendar.MILLISECOND, 0);
+        return ((date.getTime() - ntfsEpoch.getTimeInMillis())* 1000 * 10);
+    }
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
new file mode 100644
index 0000000..0ed381e
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java

@@ -0,0 +1,1011 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.zip.CRC32;
+
+import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
+import org.tukaani.xz.LZMA2InputStream;
+
+/**
+ * Reads a 7z file, using RandomAccessFile under
+ * the covers.
+ * <p>
+ * The 7z file format is a flexible container
+ * that can contain many compression types, but
+ * at the moment only Copy and LZMA2 are
+ * supported, and archive header compression
+ * (which always uses the unsupported LZMA
+ * compression) isn't. So the only archives
+ * that can be read are the following:
+ * <pre>
+ * 7z -mhc=off -mx=0 archive.7z files
+ * 7z -mhc=off -m0=LZMA2 archive.7z files
+ * </pre>
+ * <p>
+ * The format is very Windows/Intel specific,
+ * so it uses little-endian byte order,
+ * doesn't store user/group or permission bits,
+ * and represents times using NTFS timestamps
+ * (100 nanosecond units since 1 January 1601).
+ * Hence the official tools recommend against
+ * using it for backup purposes on *nix, and
+ * recommend .tar.7z or .tar.lzma or .tar.xz
+ * instead.  
+ * <p>
+ * Both the header and file contents may be
+ * compressed and/or encrypted. With both
+ * encrypted, neither file names nor file
+ * contents can be read, but the use of
+ * encryption isn't plausibly deniable.
+ * 
+ * @NotThreadSafe
+ */
+public class SevenZFile {
+    private static final boolean DEBUG = false;
+    private static final int SIGNATURE_HEADER_SIZE = 32;
+    private RandomAccessFile file;
+    private final Archive archive;
+    private int currentEntryIndex = -1;
+    private int currentFolderIndex = -1;
+    private InputStream currentFolderInputStream = null;
+    private InputStream currentEntryInputStream = null;
+        
+    private static final byte[] sevenZSignature = {
+        (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C
+    };
+    
+    public SevenZFile(final File filename) throws IOException {
+        boolean succeeded = false;
+        this.file = new RandomAccessFile(filename, "r");
+        try {
+            archive = readHeaders();
+            succeeded = true;
+        } finally {
+            if (!succeeded) {
+                this.file.close();
+            }
+        }
+    }
+    
+    public void close() {
+        if (file != null) {
+            try {
+                file.close();
+            } catch (IOException ignored) { // NOPMD
+            }
+            file = null;
+        }
+    }
+    
+    private static void debug(String str) {
+        if (DEBUG) {
+            System.out.println(str);
+        }
+    }
+    
+    private static void debug(String fmt, Object... args) {
+        if (DEBUG) {
+            System.out.format(fmt, args);
+        }
+    }
+    
+    public SevenZArchiveEntry getNextEntry() throws IOException {
+        if (currentEntryIndex >= (archive.files.length - 1)) {
+            return null;
+        }
+        ++currentEntryIndex;
+        final SevenZArchiveEntry entry = archive.files[currentEntryIndex];
+        buildDecodingStream();
+        return entry;
+    }
+    
+    private Archive readHeaders() throws IOException {
+        debug("SignatureHeader");
+        
+        final byte[] signature = new byte[6];
+        file.readFully(signature);
+        if (!Arrays.equals(signature, sevenZSignature)) {
+            throw new IOException("Bad 7z signature");
+        }
+        // 7zFormat.txt has it wrong - it's first major then minor
+        final byte archiveVersionMajor = file.readByte();
+        final byte archiveVersionMinor = file.readByte();
+        debug("  archiveVersion major=%d, minor=%d\n",
+                archiveVersionMajor, archiveVersionMinor);
+        if (archiveVersionMajor != 0) {
+            throw new IOException(String.format("Unsupported 7z version (%d,%d)",
+                    archiveVersionMajor, archiveVersionMinor));
+        }
+
+        final int startHeaderCrc = Integer.reverseBytes(file.readInt());
+        final StartHeader startHeader = readStartHeader(startHeaderCrc);
+        
+        final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize;
+        if (nextHeaderSizeInt != startHeader.nextHeaderSize) {
+            throw new IOException("cannot handle nextHeaderSize " + startHeader.nextHeaderSize);
+        }
+        file.seek(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
+        final byte[] nextHeader = new byte[nextHeaderSizeInt];
+        file.readFully(nextHeader);
+        final CRC32 crc = new CRC32();
+        crc.update(nextHeader);
+        if (startHeader.nextHeaderCrc != (int) crc.getValue()) {
+            throw new IOException("NextHeader CRC mismatch");
+        }
+        
+        final Archive archive = new Archive();
+        final ByteArrayInputStream byteStream = new ByteArrayInputStream(nextHeader);
+        final DataInputStream nextHeaderInputStream = new DataInputStream(
+                byteStream);
+        int nid = nextHeaderInputStream.readUnsignedByte();
+        if (nid == NID.kEncodedHeader) {
+            readEncodedHeader(nextHeaderInputStream, archive);
+            nid = nextHeaderInputStream.readUnsignedByte();
+        }
+        if (nid == NID.kHeader) {
+            readHeader(nextHeaderInputStream, archive);
+        }
+        return archive;
+    }
+    
+    private StartHeader readStartHeader(final int startHeaderCrc) throws IOException {
+        final StartHeader startHeader = new StartHeader();
+        DataInputStream dataInputStream = null;
+        try {
+             dataInputStream = new DataInputStream(new CRC32VerifyingInputStream(
+                    new BoundedRandomAccessFileInputStream(20), 20, startHeaderCrc));
+             startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong());
+             startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong());
+             startHeader.nextHeaderCrc = Integer.reverseBytes(dataInputStream.readInt());
+             return startHeader;
+        } finally {
+            if (dataInputStream != null) {
+                dataInputStream.close();
+            }
+        }
+    }
+    
+    private void readHeader(final DataInput header, final Archive archive) throws IOException {
+        debug("Header");
+
+        int nid = header.readUnsignedByte();
+        
+        if (nid == NID.kArchiveProperties) {
+            readArchiveProperties(header);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kAdditionalStreamsInfo) {
+            throw new IOException("Additional streams unsupported");
+            //nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kMainStreamsInfo) {
+            readStreamsInfo(header, archive);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kFilesInfo) {
+            readFilesInfo(header, archive);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid != NID.kEnd) {
+            throw new IOException("Badly terminated header");
+        }
+    }
+    
+    private void readArchiveProperties(final DataInput input) throws IOException {
+        // FIXME: the reference implementation just throws them away?
+        debug("ArchiveProperties");
+
+        int nid =  input.readUnsignedByte();
+        while (nid != NID.kEnd) {
+            final long propertySize = readUint64(input);
+            final byte[] property = new byte[(int)propertySize];
+            input.readFully(property);
+            nid = input.readUnsignedByte();
+        }
+    }
+    
+    private void readEncodedHeader(final DataInputStream header, final Archive archive) throws IOException {
+        debug("EncodedHeader");
+
+        readStreamsInfo(header, archive);
+        
+        // FIXME: and decompress it etc.
+        
+        throw new IOException("LZMA compression unsupported, so files with compressed header cannot be read");
+        // FIXME: this extracts the header to an LZMA file which can then be
+        // manually decompressed.
+//        long offset = SIGNATURE_HEADER_SIZE + archive.packPos;
+//        file.seek(offset);
+//        long unpackSize = archive.folders[0].getUnpackSize();
+//        byte[] packed = new byte[(int)archive.packSizes[0]];
+//        file.readFully(packed);
+//        
+//        FileOutputStream fos = new FileOutputStream(new File("/tmp/encodedHeader.7z"));
+//        fos.write(archive.folders[0].coders[0].properties);
+//        // size - assuming < 256
+//        fos.write((int)(unpackSize & 0xff));
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(0);
+//        fos.write(packed);
+//        fos.close();
+    }
+    
+    private void readStreamsInfo(final DataInput header, final Archive archive) throws IOException {
+        debug("StreamsInfo");
+        
+        int nid = header.readUnsignedByte();
+        
+        if (nid == NID.kPackInfo) {
+            readPackInfo(header, archive);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kUnpackInfo) {
+            readUnpackInfo(header, archive);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kSubStreamsInfo) {
+            readSubStreamsInfo(header, archive);
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid != NID.kEnd) {
+            throw new IOException("Badly terminated StreamsInfo");
+        }
+    }
+    
+    private void readPackInfo(final DataInput header, final Archive archive) throws IOException {
+        debug("PackInfo");
+        
+        archive.packPos = readUint64(header);
+        final long numPackStreams = readUint64(header);
+        debug("  " + numPackStreams + " pack streams");
+        
+        int nid = header.readUnsignedByte();
+        if (nid == NID.kSize) {
+            archive.packSizes = new long[(int)numPackStreams];
+            for (int i = 0; i < archive.packSizes.length; i++) {
+                archive.packSizes[i] = readUint64(header);
+                debug("  pack size %d is %d\n", i, archive.packSizes[i]);
+            }
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid == NID.kCRC) {
+            archive.packCrcsDefined = readAllOrBits(header, (int)numPackStreams);
+            archive.packCrcs = new int[(int)numPackStreams];
+            for (int i = 0; i < (int)numPackStreams; i++) {
+                if (archive.packCrcsDefined.get(i)) {
+                    archive.packCrcs[i] = Integer.reverseBytes(header.readInt());
+                }
+            }
+            
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid != NID.kEnd) {
+            throw new IOException("Badly terminated PackInfo (" + nid + ")");
+        }
+    }
+    
+    private void readUnpackInfo(final DataInput header, final Archive archive) throws IOException {
+        debug("UnpackInfo");
+
+        int nid = header.readUnsignedByte();
+        if (nid != NID.kFolder) {
+            throw new IOException("Expected kFolder, got " + nid);
+        }
+        final long numFolders = readUint64(header);
+        debug("  " + numFolders + " folders");
+        final Folder[] folders = new Folder[(int)numFolders];
+        archive.folders = folders;
+        final int external = header.readUnsignedByte();
+        if (external != 0) {
+            throw new IOException("External unsupported");
+        } else {
+            for (int i = 0; i < (int)numFolders; i++) {
+                folders[i] = readFolder(header);
+            }
+        }
+        
+        nid = header.readUnsignedByte();
+        if (nid != NID.kCodersUnpackSize) {
+            throw new IOException("Expected kCodersUnpackSize, got " + nid);
+        }
+        for (final Folder folder : folders) {
+            folder.unpackSizes = new long[(int)folder.totalOutputStreams];
+            for (int i = 0; i < folder.totalOutputStreams; i++) {
+                folder.unpackSizes[i] = readUint64(header);
+            }
+        }
+        
+        nid = header.readUnsignedByte();
+        if (nid == NID.kCRC) {
+            final BitSet crcsDefined = readAllOrBits(header, (int)numFolders);
+            for (int i = 0; i < (int)numFolders; i++) {
+                if (crcsDefined.get(i)) {
+                    folders[i].hasCrc = true;
+                    folders[i].crc = Integer.reverseBytes(header.readInt());
+                } else {
+                    folders[i].hasCrc = false;
+                }
+            }
+            
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid != NID.kEnd) {
+            throw new IOException("Badly terminated UnpackInfo");
+        }
+    }
+    
+    private void readSubStreamsInfo(final DataInput header, final Archive archive) throws IOException {
+        debug("SubStreamsInfo");
+        
+        for (final Folder folder : archive.folders) {
+            folder.numUnpackSubStreams = 1;
+        }
+        int totalUnpackStreams = archive.folders.length;
+        
+        int nid = header.readUnsignedByte();
+        if (nid == NID.kNumUnpackStream) {
+            totalUnpackStreams = 0;
+            for (final Folder folder : archive.folders) {
+                final long numStreams = readUint64(header);
+                folder.numUnpackSubStreams = (int)numStreams;
+                totalUnpackStreams += numStreams;
+            }
+            nid = header.readUnsignedByte();
+        }
+        
+        final SubStreamsInfo subStreamsInfo = new SubStreamsInfo();
+        subStreamsInfo.unpackSizes = new long[(int)totalUnpackStreams];
+        subStreamsInfo.hasCrc = new BitSet((int)totalUnpackStreams);
+        subStreamsInfo.crcs = new int[(int)totalUnpackStreams];
+        
+        int nextUnpackStream = 0;
+        for (final Folder folder : archive.folders) {
+            if (folder.numUnpackSubStreams == 0) {
+                continue;
+            }
+            long sum = 0;
+            if (nid == NID.kSize) {
+                for (int i = 0; i < (folder.numUnpackSubStreams - 1); i++) {
+                    final long size = readUint64(header);
+                    subStreamsInfo.unpackSizes[nextUnpackStream++] = size;
+                    sum += size;
+                }
+            }
+            subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum;
+        }
+        if (nid == NID.kSize) {
+            nid = header.readUnsignedByte();
+        }
+        
+        int numDigests = 0;
+        for (final Folder folder : archive.folders) {
+            if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) {
+                numDigests += folder.numUnpackSubStreams;
+            }
+        }
+        
+        if (nid == NID.kCRC) {
+            final BitSet hasMissingCrc = readAllOrBits(header, (int)numDigests);
+            final int[] missingCrcs = new int[(int)numDigests];
+            for (int i = 0; i < (int)numDigests; i++) {
+                if (hasMissingCrc.get(i)) {
+                    missingCrcs[i] = Integer.reverseBytes(header.readInt());
+                }
+            }
+            int nextCrc = 0;
+            int nextMissingCrc = 0;
+            for (final Folder folder: archive.folders) {
+                if (folder.numUnpackSubStreams == 1 && folder.hasCrc) {
+                    subStreamsInfo.hasCrc.set(nextCrc, true);
+                    subStreamsInfo.crcs[nextCrc] = folder.crc;
+                    ++nextCrc;
+                } else {
+                    for (int i = 0; i < folder.numUnpackSubStreams; i++) {
+                        subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc));
+                        subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc];
+                        ++nextCrc;
+                        ++nextMissingCrc;
+                    }
+                }
+            }
+            
+            nid = header.readUnsignedByte();
+        }
+        
+        if (nid != NID.kEnd) {
+            throw new IOException("Badly terminated SubStreamsInfo");
+        }
+        
+        archive.subStreamsInfo = subStreamsInfo;
+    }
+    
+    private Folder readFolder(final DataInput header) throws IOException {
+        final Folder folder = new Folder();
+        
+        final long numCoders = readUint64(header);
+        final Coder[] coders = new Coder[(int)numCoders];
+        long totalInStreams = 0;
+        long totalOutStreams = 0;
+        for (int i = 0; i < coders.length; i++) {
+            coders[i] = new Coder();
+            int bits = header.readUnsignedByte();
+            final int idSize = bits & 0xf;
+            final boolean isSimple = ((bits & 0x10) == 0);
+            final boolean hasAttributes = ((bits & 0x20) != 0);
+            final boolean moreAlternativeMethods = ((bits & 0x80) != 0);
+            
+            coders[i].decompressionMethodId = new byte[idSize];
+            header.readFully(coders[i].decompressionMethodId);
+            if (isSimple) {
+                coders[i].numInStreams = 1;
+                coders[i].numOutStreams = 1;
+            } else {
+                coders[i].numInStreams = readUint64(header);
+                coders[i].numOutStreams = readUint64(header);
+            }
+            totalInStreams += coders[i].numInStreams;
+            totalOutStreams += coders[i].numOutStreams;
+            if (hasAttributes) {
+                final long propertiesSize = readUint64(header);
+                coders[i].properties = new byte[(int)propertiesSize];
+                header.readFully(coders[i].properties);
+            }
+            if (DEBUG) {
+                final StringBuilder methodStr = new StringBuilder();
+                for (final byte b : coders[i].decompressionMethodId) {
+                    methodStr.append(String.format("%02X", 0xff & b));
+                }
+                debug("  coder entry %d numInStreams=%d, numOutStreams=%d, method=%s, properties=%s\n", i,
+                        coders[i].numInStreams, coders[i].numOutStreams, methodStr.toString(),
+                        Arrays.toString(coders[i].properties));
+            }
+            // would need to keep looping as above:
+            while (moreAlternativeMethods) {
+                throw new IOException("Alternative methods are unsupported, please report. " +
+                        "The reference implementation doesn't support them either.");
+            }
+        }
+        folder.coders = coders;
+        folder.totalInputStreams = totalInStreams;
+        folder.totalOutputStreams = totalOutStreams;
+        
+        if (totalOutStreams == 0) {
+            throw new IOException("Total output streams can't be 0");
+        }
+        final long numBindPairs = totalOutStreams - 1;
+        final BindPair[] bindPairs = new BindPair[(int)numBindPairs];
+        for (int i = 0; i < bindPairs.length; i++) {
+            bindPairs[i] = new BindPair();
+            bindPairs[i].inIndex = readUint64(header);
+            bindPairs[i].outIndex = readUint64(header);
+            debug("  bind pair in=%d out=%d\n", bindPairs[i].inIndex, bindPairs[i].outIndex);
+        }
+        folder.bindPairs = bindPairs;
+        
+        if (totalInStreams < numBindPairs) {
+            throw new IOException("Total input streams can't be less than the number of bind pairs");
+        }
+        final long numPackedStreams = totalInStreams - numBindPairs;
+        final long packedStreams[] = new long[(int)numPackedStreams];
+        if (numPackedStreams == 1) {
+            int i;
+            for (i = 0; i < (int)totalInStreams; i++) {
+                if (folder.findBindPairForInStream(i) < 0) {
+                    break;
+                }
+            }
+            if (i == (int)totalInStreams) {
+                throw new IOException("Couldn't find stream's bind pair index");
+            }
+            packedStreams[0] = i;
+        } else {
+            for (int i = 0; i < (int)numPackedStreams; i++) {
+                packedStreams[i] = readUint64(header);
+            }
+        }
+        folder.packedStreams = packedStreams;
+        
+        return folder;
+    }
+    
+    private BitSet readAllOrBits(final DataInput header, final int size) throws IOException {
+        final int areAllDefined = header.readUnsignedByte();
+        final BitSet bits;
+        if (areAllDefined != 0) {
+            bits = new BitSet(size);
+            for (int i = 0; i < size; i++) {
+                bits.set(i, true);
+            }
+        } else {
+            bits = readBits(header, size);
+        }
+        return bits;
+    }
+    
+    private BitSet readBits(final DataInput header, final int size) throws IOException {
+        final BitSet bits = new BitSet(size);
+        int mask = 0;
+        int cache = 0;
+        for (int i = 0; i < size; i++) {
+            if (mask == 0) {
+                mask = 0x80;
+                cache = header.readUnsignedByte();
+            }
+            bits.set(i, (cache & mask) != 0);
+            mask >>>= 1;
+        }
+        return bits;
+    }
+    
+    private void readFilesInfo(final DataInput header, final Archive archive) throws IOException {
+        debug("FilesInfo");
+
+        final long numFiles = readUint64(header);
+        final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int)numFiles];
+        for (int i = 0; i < files.length; i++) {
+            files[i] = new SevenZArchiveEntry();
+        }
+        BitSet isEmptyStream = null;
+        BitSet isEmptyFile = null; 
+        BitSet isAnti = null;
+        while (true) {
+            final int propertyType = header.readUnsignedByte();
+            if (propertyType == 0) {
+                break;
+            }
+            long size = readUint64(header);
+            switch (propertyType) {
+                case NID.kEmptyStream: {
+                    debug("  kEmptyStream");
+                    isEmptyStream = readBits(header, files.length);
+                    break;
+                }
+                case NID.kEmptyFile: {
+                    debug("  kEmptyFile");
+                    isEmptyFile = readBits(header, isEmptyStream.cardinality());
+                    break;
+                }
+                case NID.kAnti: {
+                    debug("  kAnti");
+                    isAnti = readBits(header, isEmptyStream.cardinality());
+                    break;
+                }
+                case NID.kName: {
+                    debug("  kNames");
+                    final int external = header.readUnsignedByte();
+                    if (external != 0) {
+                        throw new IOException("Not implemented");
+                    } else {
+                        if (((size - 1) & 1) != 0) {
+                            throw new IOException("File names length invalid");
+                        }
+                        final byte[] names = new byte[(int)(size - 1)];
+                        header.readFully(names);
+                        int nextFile = 0;
+                        int nextName = 0;
+                        for (int i = 0; i < names.length; i += 2) {
+                            if (names[i] == 0 && names[i+1] == 0) {
+                                files[nextFile++].setName(new String(names, nextName, i-nextName, "UTF-16LE"));
+                                nextName = i + 2;
+                            }
+                        }
+                        if (nextName != names.length || nextFile != files.length) {
+                            throw new IOException("Error parsing file names");
+                        }
+                    }
+                    break;
+                }
+                case NID.kCTime: {
+                    debug("  kCreationTime");
+                    final BitSet timesDefined = readAllOrBits(header, files.length);
+                    final int external = header.readUnsignedByte();
+                    if (external != 0) {
+                        throw new IOException("Unimplemented");
+                    } else {
+                        for (int i = 0; i < files.length; i++) {
+                            files[i].setHasCreationDate(timesDefined.get(i));
+                            if (files[i].getHasCreationDate()) {
+                                files[i].setCreationDate(Long.reverseBytes(header.readLong()));
+                            }
+                        }
+                    }
+                    break;
+                }
+                case NID.kATime: {
+                    debug("  kLastAccessTime");
+                    final BitSet timesDefined = readAllOrBits(header, files.length);
+                    final int external = header.readUnsignedByte();
+                    if (external != 0) {
+                        throw new IOException("Unimplemented");
+                    } else {
+                        for (int i = 0; i < files.length; i++) {
+                            files[i].setHasAcessDate(timesDefined.get(i));
+                            if (files[i].getHasAcessDate()) {
+                                files[i].setAccessDate(Long.reverseBytes(header.readLong()));
+                            }
+                        }
+                    }
+                    break;
+                }
+                case NID.kMTime: {
+                    debug("  kLastWriteTime");
+                    final BitSet timesDefined = readAllOrBits(header, files.length);
+                    final int external = header.readUnsignedByte();
+                    if (external != 0) {
+                        throw new IOException("Unimplemented");
+                    } else {
+                        for (int i = 0; i < files.length; i++) {
+                            files[i].setHasLastModifiedDate(timesDefined.get(i));
+                            if (files[i].getHasLastModifiedDate()) {
+                                files[i].setLastModifiedDate(Long.reverseBytes(header.readLong()));
+                            }
+                        }
+                    }
+                    break;
+                }
+                case NID.kWinAttributes: {
+                    debug("  kWinAttributes");
+                    final BitSet attributesDefined = readAllOrBits(header, files.length);
+                    final int external = header.readUnsignedByte();
+                    if (external != 0) {
+                        throw new IOException("Unimplemented");
+                    } else {
+                        for (int i = 0; i < files.length; i++) {
+                            files[i].setHasWindowsAttributes(attributesDefined.get(i));
+                            if (files[i].getHasWindowsAttributes()) {
+                                files[i].setWindowsAttributes(Integer.reverseBytes(header.readInt()));
+                            }
+                        }
+                    }
+                    break;
+                }
+                case NID.kStartPos: {
+                    debug("  kStartPos");
+                    throw new IOException("kStartPos is unsupported, please report");
+                }
+                case NID.kDummy: {
+                    debug("  kDummy");
+                    throw new IOException("kDummy is unsupported, please report");
+                }
+                
+                default: {
+                    throw new IOException("Unknown property " + propertyType);
+                    // FIXME: Should actually:
+                    //header.skipBytes((int)size);
+                }
+            }
+        }
+        int nonEmptyFileCounter = 0;
+        int emptyFileCounter = 0;
+        for (int i = 0; i < files.length; i++) {
+            files[i].setHasStream((isEmptyStream == null) ? true : !isEmptyStream.get(i));
+            if (files[i].hasStream()) {
+                files[i].setDirectory(false);
+                files[i].setAntiItem(false);
+                files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter));
+                files[i].setCrc(archive.subStreamsInfo.crcs[nonEmptyFileCounter]);
+                files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]);
+                ++nonEmptyFileCounter;
+            } else {
+                files[i].setDirectory((isEmptyFile == null) ? true : !isEmptyFile.get(emptyFileCounter));
+                files[i].setAntiItem((isAnti == null) ? false : isAnti.get(emptyFileCounter));
+                files[i].setHasCrc(false);
+                files[i].setSize(0);
+                ++emptyFileCounter;
+            }
+        }
+        archive.files = files;
+        calculateStreamMap(archive);
+    }
+    
+    private void calculateStreamMap(final Archive archive) throws IOException {
+        final StreamMap streamMap = new StreamMap();
+        
+        int nextFolderPackStreamIndex = 0;
+        streamMap.folderFirstPackStreamIndex = new int[archive.folders.length];
+        for (int i = 0; i < archive.folders.length; i++) {
+            streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex;
+            nextFolderPackStreamIndex += archive.folders[i].packedStreams.length;
+        }
+        
+        long nextPackStreamOffset = 0;
+        streamMap.packStreamOffsets = new long[archive.packSizes.length];
+        for (int i = 0; i < archive.packSizes.length; i++) {
+            streamMap.packStreamOffsets[i] = nextPackStreamOffset;
+            nextPackStreamOffset += archive.packSizes[i]; 
+        }
+        
+        streamMap.folderFirstFileIndex = new int[archive.folders.length];
+        streamMap.fileFolderIndex = new int[archive.files.length];
+        int nextFolderIndex = 0;
+        int nextFolderUnpackStreamIndex = 0;
+        for (int i = 0; i < archive.files.length; i++) {
+            if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) {
+                streamMap.fileFolderIndex[i] = -1;
+                continue;
+            }
+            if (nextFolderUnpackStreamIndex == 0) {
+                for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) {
+                    streamMap.folderFirstFileIndex[nextFolderIndex] = i;
+                    if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) {
+                        break;
+                    }
+                }
+                if (nextFolderIndex >= archive.folders.length) {
+                    throw new IOException("Too few folders in archive");
+                }
+            }
+            streamMap.fileFolderIndex[i] = nextFolderIndex;
+            if (!archive.files[i].hasStream()) {
+                continue;
+            }
+            ++nextFolderUnpackStreamIndex;
+            if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) {
+                ++nextFolderIndex;
+                nextFolderUnpackStreamIndex = 0;
+            }
+        }
+        
+        archive.streamMap = streamMap;
+    }
+    
+    private void buildDecodingStream() throws IOException {
+        final int folderIndex = archive.streamMap.fileFolderIndex[currentEntryIndex];
+        if (folderIndex < 0) {
+            currentEntryInputStream = new BoundedInputStream(
+                    new ByteArrayInputStream(new byte[0]), 0);
+            return;
+        }
+        if (currentFolderIndex == folderIndex) {
+            // need to advance the folder input stream past the current file
+            drainPreviousEntry();
+        } else {
+            currentFolderIndex = folderIndex;
+            if (currentFolderInputStream != null) {
+                currentFolderInputStream.close();
+                currentFolderInputStream = null;
+            }
+            
+            final Folder folder = archive.folders[folderIndex];
+            final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex];
+            final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos +
+                    archive.streamMap.packStreamOffsets[firstPackStreamIndex];
+            currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex);
+        }
+        final SevenZArchiveEntry file = archive.files[currentEntryIndex];
+        final InputStream fileStream = new BoundedInputStream(
+                currentFolderInputStream, file.getSize());
+        if (file.getHasCrc()) {
+            currentEntryInputStream = new CRC32VerifyingInputStream(
+                    fileStream, file.getSize(), file.getCrc());
+        } else {
+            currentEntryInputStream = fileStream;
+        }
+        
+    }
+    
+    private void drainPreviousEntry() throws IOException {
+        if (currentEntryInputStream != null) {
+            final byte[] buffer = new byte[64*1024];
+            while (currentEntryInputStream.read(buffer) >= 0) { // NOPMD
+            }
+            currentEntryInputStream.close();
+            currentEntryInputStream = null;
+        }
+    }
+    
+    private InputStream buildDecoderStack(final Folder folder, final long folderOffset,
+            final int firstPackStreamIndex) throws IOException {
+        InputStream inputStreamStack = null;
+        for (int i = 0; i < folder.coders.length; i++) {
+            if (i > 0) {
+                throw new IOException("Unsupported multi-codec stream");
+            }
+            file.seek(folderOffset);
+            if (folder.coders[i].decompressionMethodId.length == 1 &&
+                    folder.coders[i].decompressionMethodId[0] == 0) {
+                // 00 - Copy
+                inputStreamStack = new BoundedRandomAccessFileInputStream(
+                        archive.packSizes[firstPackStreamIndex]);
+                // FIXME: LZMA is the default coder yet ironically we don't have it.
+//            } else if (folder.coders[i].decompressionMethodId.length == 3 &&
+//                    folder.coders[i].decompressionMethodId[0] == 3 &&
+//                    folder.coders[i].decompressionMethodId[1] == 1 &&
+//                    folder.coders[i].decompressionMethodId[2] == 1) {
+//                // 03.. - 7z
+//                //    01 - LZMA
+//                //       01 - Version
+            } else if (folder.coders[i].decompressionMethodId.length == 1 &&
+                    folder.coders[i].decompressionMethodId[0] == 0x21) {
+                // 21 - LZMA2
+                final int dictionarySizeBits = 0xff & folder.coders[i].properties[0];
+                if ((dictionarySizeBits & (~0x3f)) != 0) {
+                    throw new IOException("Unsupported LZMA2 property bits");
+                }
+                if (dictionarySizeBits > 40) {
+                    throw new IOException("Dictionary larger than 4GiB maximum size");
+                }
+                final int dictionarySize;
+                if (dictionarySizeBits == 40) {
+                    dictionarySize = 0xFFFFffff;
+                } else {
+                    dictionarySize = (2 | (dictionarySizeBits & 0x1)) << (dictionarySizeBits / 2 + 11);
+                }
+                inputStreamStack = new LZMA2InputStream(
+                      new BoundedRandomAccessFileInputStream(
+                              archive.packSizes[firstPackStreamIndex]),
+                              dictionarySize);
+                // FIXME: gives corrupt output:
+//            } else if (folder.coders[i].decompressionMethodId.length == 3 &&
+//                    folder.coders[i].decompressionMethodId[0] == 0x4 &&
+//                    folder.coders[i].decompressionMethodId[1] == 0x1 && 
+//                    folder.coders[i].decompressionMethodId[2] == 0x8) {
+//                // 04.. - Misc
+//                //    00 - Reserved
+//                //    01 - Zip
+//                //       00 - Copy (not used). Use {00} instead
+//                //       01 - Shrink
+//                //       06 - Implode
+//                //       08 - Deflate
+//                return new DeflaterInputStream(
+//                        new BoundedRandomAccessFileInputStream(
+//                                archive.packSizes[firstPackStreamIndex]),
+//                                new Deflater(Deflater.DEFAULT_COMPRESSION, true));
+            } else {
+                throw new IOException("Unsupported compression method " +
+                        Arrays.toString(folder.coders[i].decompressionMethodId));
+            }
+        }
+        if (folder.hasCrc) {
+            return new CRC32VerifyingInputStream(inputStreamStack,
+                    folder.getUnpackSize(), folder.crc);
+        } else {
+            return inputStreamStack;
+        }
+    }
+    
+    public int read() throws IOException {
+        return currentEntryInputStream.read();
+    }
+    
+    public int read(byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+    
+    public int read(byte[] b, int off, int len) throws IOException {
+        return currentEntryInputStream.read(b, off, len);
+    }
+    
+    private static long readUint64(final DataInput in) throws IOException {
+        int firstByte = in.readUnsignedByte();
+        int mask = 0x80;
+        int value = 0;
+        for (int i = 0; i < 8; i++) {
+            if ((firstByte & mask) == 0) {
+                return value | ((firstByte & (mask - 1)) << (8 * i));
+            }
+            int nextByte = in.readUnsignedByte();
+            value |= (nextByte << (8 * i));
+            mask >>>= 1;
+        }
+        return value;
+    }
+    
+    private class BoundedRandomAccessFileInputStream extends InputStream {
+        private long bytesRemaining;
+        
+        public BoundedRandomAccessFileInputStream(final long size) {
+            bytesRemaining = size;
+        }
+        
+        @Override
+        public int read() throws IOException {
+            if (bytesRemaining > 0) {
+                --bytesRemaining;
+                return file.read();
+            } else {
+                return -1;
+            }
+        }
+
+        @Override
+        public int read(byte[] b, int off, int len) throws IOException {
+            if (bytesRemaining == 0) {
+                return -1;
+            }
+            int bytesToRead = len;
+            if (bytesToRead > bytesRemaining) {
+                bytesToRead = (int) bytesRemaining;
+            }
+            final int bytesRead = file.read(b, off, bytesToRead);
+            if (bytesRead >= 0) {
+                bytesRemaining -= bytesRead;
+            }
+            return bytesRead;
+        }
+
+        @Override
+        public void close() {
+        }
+    }
+
+    private static class BoundedInputStream extends InputStream {
+        private InputStream is;
+        private long bytesRemaining;
+        
+        public BoundedInputStream(final InputStream is, final long size) {
+            this.is = is;
+            bytesRemaining = size;
+        }
+        
+        @Override
+        public int read() throws IOException {
+            if (bytesRemaining > 0) {
+                --bytesRemaining;
+                return is.read();
+            } else {
+                return -1;
+            }
+        }
+
+        @Override
+        public int read(byte[] b, int off, int len) throws IOException {
+            if (bytesRemaining == 0) {
+                return -1;
+            }
+            int bytesToRead = len;
+            if (bytesToRead > bytesRemaining) {
+                bytesToRead = (int) bytesRemaining;
+            }
+            final int bytesRead = is.read(b, off, bytesToRead);
+            if (bytesRead >= 0) {
+                bytesRemaining -= bytesRead;
+            }
+            return bytesRead;
+        }
+
+        @Override
+        public void close() {
+        }
+    }
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
new file mode 100644
index 0000000..4faa083
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java

@@ -0,0 +1,24 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+class StartHeader {
+    long nextHeaderOffset;
+    long nextHeaderSize;
+    int nextHeaderCrc;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java
new file mode 100644
index 0000000..4638acc
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java

@@ -0,0 +1,30 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+/// Map between folders, files and streams.
+class StreamMap {
+    /// The first Archive.packStream index of each folder.
+    int[] folderFirstPackStreamIndex;
+    /// Offset to beginning of this pack stream's data, relative to the beginning of the first pack stream.
+    long[] packStreamOffsets;
+    /// Index of first file for each folder.
+    int[] folderFirstFileIndex;
+    /// Index of folder for each file.
+    int[] fileFolderIndex;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java
new file mode 100644
index 0000000..04fb814
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java

@@ -0,0 +1,30 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import java.util.BitSet;
+
+/// Properties for non-empty files.
+class SubStreamsInfo {
+    /// Unpacked size of each unpacked stream.
+    long[] unpackSizes;
+    /// Whether CRC is present for each unpacked stream.
+    BitSet hasCrc;
+    /// CRCs of unpacked streams, if present.
+    int[] crcs;
+}

diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html
new file mode 100644
index 0000000..6b85bcf
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html

@@ -0,0 +1,24 @@
+<html>
+<!--
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-->
+  <body>
+    <p>Provides classes for reading archives using
+      the 7z format.</p>
+  </body>
+</html>

diff --git a/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java
new file mode 100644
index 0000000..6377fb6
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java

@@ -0,0 +1,84 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.CRC32;
+
+public class CRC32VerifyingInputStream extends InputStream {
+    private final InputStream is;
+    private long bytesRemaining;
+    private final int expectedCrc32;
+    private final CRC32 crc32 = new CRC32();
+    
+    public CRC32VerifyingInputStream(final InputStream is, final long size, final int expectedCrc32) {
+        this.is = is;
+        this.expectedCrc32 = expectedCrc32;
+        this.bytesRemaining = size;
+    }
+
+    @Override
+    public int read() throws IOException {
+        if (bytesRemaining <= 0) {
+            return -1;
+        }
+        int ret = is.read();
+        if (ret >= 0) {
+            crc32.update(ret);
+            --bytesRemaining;
+        }
+        if (bytesRemaining == 0 && expectedCrc32 != (int)crc32.getValue()) {
+            throw new IOException("CRC32 verification failed");
+        }
+        return ret;
+    }
+
+    @Override
+    public int read(byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        int ret = is.read(b, off, len);
+        if (ret >= 0) {
+            crc32.update(b, off, ret);
+            bytesRemaining -= ret;
+        }
+        if (bytesRemaining <= 0 && expectedCrc32 != (int)crc32.getValue()) {
+            throw new IOException("CRC32 verification failed");
+        }
+        return ret;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        // Can't really skip, we have to hash everything to verify the checksum
+        if (read() >= 0) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        is.close();
+    }
+}

diff --git a/src/site/xdoc/examples.xml b/src/site/xdoc/examples.xml
index a48cb31..1ab9da9 100644
--- a/src/site/xdoc/examples.xml
+++ b/src/site/xdoc/examples.xml

@@ -33,7 +33,7 @@
 
         <p>The compressor formats supported are gzip, bzip2, xz and
         Pack200, the archiver formats are ar, cpio, dump (read-only),
-        tar and zip.  Pack200 is a special case as it can only
+        tar, zip and 7z.  Pack200 is a special case as it can only
         compress JAR files.</p>
       </subsection>
 
@@ -424,6 +424,21 @@
 ]]></source>
       </subsection>
 
+      <subsection name="7z">
+
+        <p>Uncompressing a given 7z archive (you would
+          certainly add exception handling and make sure all streams
+          get closed properly):</p>
+<source><![CDATA[
+SevenZFile sevenZFile = new SevenZFile(new File("archive.7z"));
+SevenZArchiveEntry entry = sevenZFile.getNextEntry();
+byte[] content = new byte[entry.getSize()];
+LOOP UNTIL entry.getSize() HAS BEEN READ {
+    sevenZFile.read(content, offset, content.length - offset);
+}
+]]></source>
+      </subsection>
+
     </section>
   </body>
 </document>

diff --git a/src/site/xdoc/index.xml b/src/site/xdoc/index.xml
index 10367ae..d2a12f7 100644
--- a/src/site/xdoc/index.xml
+++ b/src/site/xdoc/index.xml

@@ -26,7 +26,8 @@
         <section name="Apache Commons Compress&#x2122;">
             <p>
                 The Apache Commons Compress library defines an API for
-                working with ar, cpio, Unix dump, tar, zip, gzip, XZ, Pack200 and bzip2 files.
+                working with ar, cpio, Unix dump, tar, zip, gzip, XZ, Pack200,
+                bzip2 and 7z files.
             </p>
             <p>
                 The code in this component has many origins:
@@ -71,7 +72,7 @@
             domain <a href="http://tukaani.org/xz/java.html">XZ for
             Java</a> library.</p>
 
-          <p>The ar, cpio, dump, tar and zip formats are supported as
+          <p>The ar, cpio, dump, tar, 7z and zip formats are supported as
             archivers where the <a href="zip.html">zip</a>
             implementation provides capabilities that go beyond the
             features found in java.util.zip.  As of Commons Compress

diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
new file mode 100644
index 0000000..7ad1e60
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java

@@ -0,0 +1,49 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.sevenz;
+
+import org.apache.commons.compress.AbstractTestCase;
+
+public class SevenZFileTest extends AbstractTestCase {
+    public void testHelloWorldHeaderCompressionOffCopy() throws Exception {
+        checkHelloWorld("7z-hello-mhc-off-copy.7z");
+    }
+
+    public void testHelloWorldHeaderCompressionOffLZMA2() throws Exception {
+        checkHelloWorld("7z-hello-mhc-off-lzma2.7z");
+    }
+
+    private void checkHelloWorld(final String filename) throws Exception {
+        SevenZFile sevenZFile = new SevenZFile(getFile(filename));
+        try {
+            SevenZArchiveEntry entry = sevenZFile.getNextEntry();
+            assertEquals("Hello world.txt", entry.getName());
+            byte[] contents = new byte[(int)entry.getSize()];
+            int off = 0;
+            while ((off < contents.length)) {
+                int bytesRead = sevenZFile.read(contents, off, contents.length - off);
+                assert(bytesRead >= 0);
+                off += bytesRead;
+            }
+            assertEquals("Hello, world!\n", new String(contents, "UTF-8"));
+            assertNull(sevenZFile.getNextEntry());
+        } finally {
+            sevenZFile.close();
+        }
+    }
+}

diff --git a/src/test/resources/7z-hello-mhc-off-copy.7z b/src/test/resources/7z-hello-mhc-off-copy.7z
new file mode 100644
index 0000000..2cc91da
--- /dev/null
+++ b/src/test/resources/7z-hello-mhc-off-copy.7z
Binary files differ

diff --git a/src/test/resources/7z-hello-mhc-off-lzma2.7z b/src/test/resources/7z-hello-mhc-off-lzma2.7z
new file mode 100644
index 0000000..0fbcd25
--- /dev/null
+++ b/src/test/resources/7z-hello-mhc-off-lzma2.7z
Binary files differ
commit	f69ccab744282fc7335dba46656f01ea92901678	[log] [tgz]
author	Damjan Jovanovic <damjan@apache.org>	Tue May 07 20:06:37 2013 +0000
committer	Damjan Jovanovic <damjan@apache.org>	Tue May 07 20:06:37 2013 +0000
tree	3013cac33211176a0f95e5aa849674106d968835
parent	d244dc6f6b7b1dba9f20cd7698fc576b2ec5e4ac [diff]