src/main/java/org/apache/commons/compress/compressors/snappy/SnappyCompressorInputStream.java - platform/external/apache-commons-compress - Gitiles

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.commons.compress.compressors.snappy;

 import java.io.IOException;
 import java.io.InputStream;

 import org.apache.commons.compress.compressors.CompressorInputStream;
 import org.apache.commons.compress.utils.IOUtils;

 /**
  * CompressorInputStream for the raw Snappy format.
  *
  * <p>This implementation uses an internal buffer in order to handle
  * the back-references that are at the heart of the LZ77 algorithm.
  * The size of the buffer must be at least as big as the biggest
  * offset used in the compressed stream.  The current version of the
  * Snappy algorithm as defined by Google works on 32k blocks and
  * doesn't contain offsets bigger than 32k which is the default block
  * size used by this class.</p>
  *
  * @see <a href="http://code.google.com/p/snappy/source/browse/trunk/format_description.txt">Snappy compressed format description</a>
  * @since 1.7
  */
 public class SnappyCompressorInputStream extends CompressorInputStream {

     /** Mask used to determine the type of "tag" is being processed */
     private static final int TAG_MASK = 0x03;

     /** Default block size */
     public static final int DEFAULT_BLOCK_SIZE = 32768;

     /** Buffer to write decompressed bytes to for back-references */
     private final byte[] decompressBuf;

     /** One behind the index of the last byte in the buffer that was written */
     private int writeIndex;

     /** Index of the next byte to be read. */
     private int readIndex;

     /** The actual block size specified */
     private final int blockSize;

     /** The underlying stream to read compressed data from */
     private final InputStream in;

     /** The size of the uncompressed data */
     private final int size;

     /** Number of uncompressed bytes still to be read. */
     private int uncompressedBytesRemaining;

     // used in no-arg read method
     private final byte[] oneByte = new byte[1];

     private boolean endReached = false;

     /**
      * Constructor using the default buffer size of 32k.
      *
      * @param is
      *            An InputStream to read compressed data from
      *
      * @throws IOException
      */
     public SnappyCompressorInputStream(final InputStream is) throws IOException {
         this(is, DEFAULT_BLOCK_SIZE);
     }

     /**
      * Constructor using a configurable buffer size.
      *
      * @param is
      *            An InputStream to read compressed data from
      * @param blockSize
      *            The block size used in compression
      *
      * @throws IOException
      */
     public SnappyCompressorInputStream(final InputStream is, final int blockSize)
             throws IOException {
         this.in = is;
         this.blockSize = blockSize;
         this.decompressBuf = new byte[blockSize * 3];
         this.writeIndex = readIndex = 0;
         uncompressedBytesRemaining = size = (int) readSize();
     }

     /** {@inheritDoc} */
     @Override
     public int read() throws IOException {
         return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
     }

     /** {@inheritDoc} */
     @Override
     public void close() throws IOException {
         in.close();
     }

     /** {@inheritDoc} */
     @Override
     public int available() {
         return writeIndex - readIndex;
     }

     /**
      * {@inheritDoc}
      */
     @Override
     public int read(byte[] b, int off, int len) throws IOException {
         if (endReached) {
             return -1;
         }
         final int avail = available();
         if (len > avail) {
             fill(len - avail);
         }

         int readable = Math.min(len, available());
         if (readable == 0 && len > 0) {
             return -1;
         }
         System.arraycopy(decompressBuf, readIndex, b, off, readable);
         readIndex += readable;
         if (readIndex > blockSize) {
             slideBuffer();
         }
         return readable;
     }

     /**
      * Try to fill the buffer with enough bytes to satisfy the current
      * read request.
      *
      * @param len the number of uncompressed bytes to read
      */
     private void fill(int len) throws IOException {
         if (uncompressedBytesRemaining == 0) {
             endReached = true;
         }
         int readNow = Math.min(len, uncompressedBytesRemaining);

         while (readNow > 0) {
             final int b = readOneByte();
             int length = 0;
             long offset = 0;

             switch (b & TAG_MASK) {

             case 0x00:

                 length = readLiteralLength(b);

                 if (expandLiteral(length)) {
                     return;
                 }
                 break;

             case 0x01:

                 /*
                  * These elements can encode lengths between [4..11] bytes and
                  * offsets between [0..2047] bytes. (len-4) occupies three bits
                  * and is stored in bits [2..4] of the tag byte. The offset
                  * occupies 11 bits, of which the upper three are stored in the
                  * upper three bits ([5..7]) of the tag byte, and the lower
                  * eight are stored in a byte following the tag byte.
                  */

                 length = 4 + ((b >> 2) & 0x07);
                 offset = (b & 0xE0) << 3;
                 offset |= readOneByte();

                 if (expandCopy(offset, length)) {
                     return;
                 }
                 break;

             case 0x02:

                 /*
                  * These elements can encode lengths between [1..64] and offsets
                  * from [0..65535]. (len-1) occupies six bits and is stored in
                  * the upper six bits ([2..7]) of the tag byte. The offset is
                  * stored as a little-endian 16-bit integer in the two bytes
                  * following the tag byte.
                  */

                 length = (b >> 2) + 1;

                 offset = readOneByte();
                 offset |= readOneByte() << 8;

                 if (expandCopy(offset, length)) {
                     return;
                 }
                 break;

             case 0x03:

                 /*
                  * These are like the copies with 2-byte offsets (see previous
                  * subsection), except that the offset is stored as a 32-bit
                  * integer instead of a 16-bit integer (and thus will occupy
                  * four bytes).
                  */

                 length = (b >> 2) + 1;

                 offset = readOneByte();
                 offset |= readOneByte() << 8;
                 offset |= readOneByte() << 16;
                 offset |= ((long) readOneByte()) << 24;

                 if (expandCopy(offset, length)) {
                     return;
                 }
                 break;
             }

             readNow -= length;
             uncompressedBytesRemaining -= length;
         }
     }

     /**
      * Slide buffer.
      *
      * <p>Move all bytes of the buffer after the first block down to
      * the beginning of the buffer.</p>
      */
     private void slideBuffer() {
         System.arraycopy(decompressBuf, blockSize, decompressBuf, 0,
                          blockSize * 2);
         writeIndex -= blockSize;
         readIndex -= blockSize;
     }


     /*
      * For literals up to and including 60 bytes in length, the
      * upper six bits of the tag byte contain (len-1). The literal
      * follows immediately thereafter in the bytestream. - For
      * longer literals, the (len-1) value is stored after the tag
      * byte, little-endian. The upper six bits of the tag byte
      * describe how many bytes are used for the length; 60, 61, 62
      * or 63 for 1-4 bytes, respectively. The literal itself follows
      * after the length.
      */
     private int readLiteralLength(int b) throws IOException {
         int length;
         switch (b >> 2) {
         case 60:
             length = readOneByte();
             break;
         case 61:
             length = readOneByte();
             length |= readOneByte() << 8;
             break;
         case 62:
             length = readOneByte();
             length |= readOneByte() << 8;
             length |= readOneByte() << 16;
             break;
         case 63:
             length = readOneByte();
             length |= readOneByte() << 8;
             length |= readOneByte() << 16;
             length |= (((long) readOneByte()) << 24);
             break;
         default:
             length = b >> 2;
             break;
         }

         return length + 1;
     }

     /**
      * Literals are uncompressed data stored directly in the byte stream.
      *
      * @param length
      *            The number of bytes to read from the underlying stream
      *
      * @throws IOException
      *             If the first byte cannot be read for any reason other than
      *             end of file, or if the input stream has been closed, or if
      *             some other I/O error occurs.
      * @return True if the decompressed data should be flushed
      */
     private boolean expandLiteral(final int length) throws IOException {
         int bytesRead = IOUtils.readFully(in, decompressBuf, writeIndex, length);
         count(bytesRead);
         if (length != bytesRead) {
             throw new IOException("Premature end of stream");
         }

         writeIndex += length;
         return writeIndex >= 2 * this.blockSize;
     }

     /**
      * Copies are references back into previous decompressed data, telling the
      * decompressor to reuse data it has previously decoded. They encode two
      * values: The offset, saying how many bytes back from the current position
      * to read, and the length, how many bytes to copy. Offsets of zero can be
      * encoded, but are not legal; similarly, it is possible to encode
      * backreferences that would go past the end of the block (offset > current
      * decompressed position), which is also nonsensical and thus not allowed.
      *
      * @param off
      *            The offset from the backward from the end of expanded stream
      * @param length
      *            The number of bytes to copy
      *
      * @throws IOException
      *             An the offset expands past the front of the decompression
      *             buffer
      * @return True if the decompressed data should be flushed
      */
     private boolean expandCopy(final long off, int length) throws IOException {
         if (off > blockSize) {
             throw new IOException("Offset is larger than block size");
         }
         int offset = (int) off;

         if (offset == 1) {
             byte lastChar = decompressBuf[writeIndex - 1];
             for (int i = 0; i < length; i++) {
                 decompressBuf[writeIndex++] = lastChar;
             }
         } else if (length < offset) {
             System.arraycopy(decompressBuf, writeIndex - offset,
                     decompressBuf, writeIndex, length);
             writeIndex += length;
         } else {
             int fullRotations = length / offset;
             int pad = length - (offset * fullRotations);

             while (fullRotations-- != 0) {
                 System.arraycopy(decompressBuf, writeIndex - offset,
                         decompressBuf, writeIndex, offset);
                 writeIndex += offset;
             }

             if (pad > 0) {
                 System.arraycopy(decompressBuf, writeIndex - offset,
                         decompressBuf, writeIndex, pad);

                 writeIndex += pad;
             }
         }
         return writeIndex >= 2 * this.blockSize;
     }

     /**
      * This helper method reads the next byte of data from the input stream. The
      * value byte is returned as an <code>int</code> in the range <code>0</code>
      * to <code>255</code>. If no byte is available because the end of the
      * stream has been reached, an Exception is thrown.
      *
      * @return The next byte of data
      * @throws IOException
      *             EOF is reached or error reading the stream
      */
     private int readOneByte() throws IOException {
         int b = in.read();
         if (b == -1) {
             throw new IOException("Premature end of stream");
         }
         count(1);
         return b & 0xFF;
     }

     /**
      * The stream starts with the uncompressed length (up to a maximum of 2^32 -
      * 1), stored as a little-endian varint. Varints consist of a series of
      * bytes, where the lower 7 bits are data and the upper bit is set iff there
      * are more bytes to be read. In other words, an uncompressed length of 64
      * would be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
      * would be stored as 0xFE 0xFF 0x7F.
      *
      * @return The size of the uncompressed data
      *
      * @throws IOException
      *             Could not read a byte
      */
     private long readSize() throws IOException {
         int index = 0;
         long sz = 0;
         int b = 0;

         do {
             b = readOneByte();
             sz |= (b & 0x7f) << (index++ * 7);
         } while (0 != (b & 0x80));
         return sz;
     }

     /**
      * Get the uncompressed size of the stream
      *
      * @return the uncompressed size
      */
     public int getSize() {
         return size;
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.commons.compress.compressors.snappy;

	import java.io.IOException;
	import java.io.InputStream;

	import org.apache.commons.compress.compressors.CompressorInputStream;
	import org.apache.commons.compress.utils.IOUtils;

	/**
	* CompressorInputStream for the raw Snappy format.
	*
	* <p>This implementation uses an internal buffer in order to handle
	* the back-references that are at the heart of the LZ77 algorithm.
	* The size of the buffer must be at least as big as the biggest
	* offset used in the compressed stream. The current version of the
	* Snappy algorithm as defined by Google works on 32k blocks and
	* doesn't contain offsets bigger than 32k which is the default block
	* size used by this class.</p>
	*
	* @see <a href="http://code.google.com/p/snappy/source/browse/trunk/format_description.txt">Snappy compressed format description</a>
	* @since 1.7
	*/
	public class SnappyCompressorInputStream extends CompressorInputStream {

	/** Mask used to determine the type of "tag" is being processed */
	private static final int TAG_MASK = 0x03;

	/** Default block size */
	public static final int DEFAULT_BLOCK_SIZE = 32768;

	/** Buffer to write decompressed bytes to for back-references */
	private final byte[] decompressBuf;

	/** One behind the index of the last byte in the buffer that was written */
	private int writeIndex;

	/** Index of the next byte to be read. */
	private int readIndex;

	/** The actual block size specified */
	private final int blockSize;

	/** The underlying stream to read compressed data from */
	private final InputStream in;

	/** The size of the uncompressed data */
	private final int size;

	/** Number of uncompressed bytes still to be read. */
	private int uncompressedBytesRemaining;

	// used in no-arg read method
	private final byte[] oneByte = new byte[1];

	private boolean endReached = false;

	/**
	* Constructor using the default buffer size of 32k.
	*
	* @param is
	* An InputStream to read compressed data from
	*
	* @throws IOException
	*/
	public SnappyCompressorInputStream(final InputStream is) throws IOException {
	this(is, DEFAULT_BLOCK_SIZE);
	}

	/**
	* Constructor using a configurable buffer size.
	*
	* @param is
	* An InputStream to read compressed data from
	* @param blockSize
	* The block size used in compression
	*
	* @throws IOException
	*/
	public SnappyCompressorInputStream(final InputStream is, final int blockSize)
	throws IOException {
	this.in = is;
	this.blockSize = blockSize;
	this.decompressBuf = new byte[blockSize * 3];
	this.writeIndex = readIndex = 0;
	uncompressedBytesRemaining = size = (int) readSize();
	}

	/** {@inheritDoc} */
	@Override
	public int read() throws IOException {
	return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
	}

	/** {@inheritDoc} */
	@Override
	public void close() throws IOException {
	in.close();
	}

	/** {@inheritDoc} */
	@Override
	public int available() {
	return writeIndex - readIndex;
	}

	/**
	* {@inheritDoc}
	*/
	@Override
	public int read(byte[] b, int off, int len) throws IOException {
	if (endReached) {
	return -1;
	}
	final int avail = available();
	if (len > avail) {
	fill(len - avail);
	}

	int readable = Math.min(len, available());
	if (readable == 0 && len > 0) {
	return -1;
	}
	System.arraycopy(decompressBuf, readIndex, b, off, readable);
	readIndex += readable;
	if (readIndex > blockSize) {
	slideBuffer();
	}
	return readable;
	}

	/**
	* Try to fill the buffer with enough bytes to satisfy the current
	* read request.
	*
	* @param len the number of uncompressed bytes to read
	*/
	private void fill(int len) throws IOException {
	if (uncompressedBytesRemaining == 0) {
	endReached = true;
	}
	int readNow = Math.min(len, uncompressedBytesRemaining);

	while (readNow > 0) {
	final int b = readOneByte();
	int length = 0;
	long offset = 0;

	switch (b & TAG_MASK) {

	case 0x00:

	length = readLiteralLength(b);

	if (expandLiteral(length)) {
	return;
	}
	break;

	case 0x01:

	/*
	* These elements can encode lengths between [4..11] bytes and
	* offsets between [0..2047] bytes. (len-4) occupies three bits
	* and is stored in bits [2..4] of the tag byte. The offset
	* occupies 11 bits, of which the upper three are stored in the
	* upper three bits ([5..7]) of the tag byte, and the lower
	* eight are stored in a byte following the tag byte.
	*/

	length = 4 + ((b >> 2) & 0x07);
	offset = (b & 0xE0) << 3;
	offset \|= readOneByte();

	if (expandCopy(offset, length)) {
	return;
	}
	break;

	case 0x02:

	/*
	* These elements can encode lengths between [1..64] and offsets
	* from [0..65535]. (len-1) occupies six bits and is stored in
	* the upper six bits ([2..7]) of the tag byte. The offset is
	* stored as a little-endian 16-bit integer in the two bytes
	* following the tag byte.
	*/

	length = (b >> 2) + 1;

	offset = readOneByte();
	offset \|= readOneByte() << 8;

	if (expandCopy(offset, length)) {
	return;
	}
	break;

	case 0x03:

	/*
	* These are like the copies with 2-byte offsets (see previous
	* subsection), except that the offset is stored as a 32-bit
	* integer instead of a 16-bit integer (and thus will occupy
	* four bytes).
	*/

	length = (b >> 2) + 1;

	offset = readOneByte();
	offset \|= readOneByte() << 8;
	offset \|= readOneByte() << 16;
	offset \|= ((long) readOneByte()) << 24;

	if (expandCopy(offset, length)) {
	return;
	}
	break;
	}

	readNow -= length;
	uncompressedBytesRemaining -= length;
	}
	}

	/**
	* Slide buffer.
	*
	* <p>Move all bytes of the buffer after the first block down to
	* the beginning of the buffer.</p>
	*/
	private void slideBuffer() {
	System.arraycopy(decompressBuf, blockSize, decompressBuf, 0,
	blockSize * 2);
	writeIndex -= blockSize;
	readIndex -= blockSize;
	}


	/*
	* For literals up to and including 60 bytes in length, the
	* upper six bits of the tag byte contain (len-1). The literal
	* follows immediately thereafter in the bytestream. - For
	* longer literals, the (len-1) value is stored after the tag
	* byte, little-endian. The upper six bits of the tag byte
	* describe how many bytes are used for the length; 60, 61, 62
	* or 63 for 1-4 bytes, respectively. The literal itself follows
	* after the length.
	*/
	private int readLiteralLength(int b) throws IOException {
	int length;
	switch (b >> 2) {
	case 60:
	length = readOneByte();
	break;
	case 61:
	length = readOneByte();
	length \|= readOneByte() << 8;
	break;
	case 62:
	length = readOneByte();
	length \|= readOneByte() << 8;
	length \|= readOneByte() << 16;
	break;
	case 63:
	length = readOneByte();
	length \|= readOneByte() << 8;
	length \|= readOneByte() << 16;
	length \|= (((long) readOneByte()) << 24);
	break;
	default:
	length = b >> 2;
	break;
	}

	return length + 1;
	}

	/**
	* Literals are uncompressed data stored directly in the byte stream.
	*
	* @param length
	* The number of bytes to read from the underlying stream
	*
	* @throws IOException
	* If the first byte cannot be read for any reason other than
	* end of file, or if the input stream has been closed, or if
	* some other I/O error occurs.
	* @return True if the decompressed data should be flushed
	*/
	private boolean expandLiteral(final int length) throws IOException {
	int bytesRead = IOUtils.readFully(in, decompressBuf, writeIndex, length);
	count(bytesRead);
	if (length != bytesRead) {
	throw new IOException("Premature end of stream");
	}

	writeIndex += length;
	return writeIndex >= 2 * this.blockSize;
	}

	/**
	* Copies are references back into previous decompressed data, telling the
	* decompressor to reuse data it has previously decoded. They encode two
	* values: The offset, saying how many bytes back from the current position
	* to read, and the length, how many bytes to copy. Offsets of zero can be
	* encoded, but are not legal; similarly, it is possible to encode
	* backreferences that would go past the end of the block (offset > current
	* decompressed position), which is also nonsensical and thus not allowed.
	*
	* @param off
	* The offset from the backward from the end of expanded stream
	* @param length
	* The number of bytes to copy
	*
	* @throws IOException
	* An the offset expands past the front of the decompression
	* buffer
	* @return True if the decompressed data should be flushed
	*/
	private boolean expandCopy(final long off, int length) throws IOException {
	if (off > blockSize) {
	throw new IOException("Offset is larger than block size");
	}
	int offset = (int) off;

	if (offset == 1) {
	byte lastChar = decompressBuf[writeIndex - 1];
	for (int i = 0; i < length; i++) {
	decompressBuf[writeIndex++] = lastChar;
	}
	} else if (length < offset) {
	System.arraycopy(decompressBuf, writeIndex - offset,
	decompressBuf, writeIndex, length);
	writeIndex += length;
	} else {
	int fullRotations = length / offset;
	int pad = length - (offset * fullRotations);

	while (fullRotations-- != 0) {
	System.arraycopy(decompressBuf, writeIndex - offset,
	decompressBuf, writeIndex, offset);
	writeIndex += offset;
	}

	if (pad > 0) {
	System.arraycopy(decompressBuf, writeIndex - offset,
	decompressBuf, writeIndex, pad);

	writeIndex += pad;
	}
	}
	return writeIndex >= 2 * this.blockSize;
	}

	/**
	* This helper method reads the next byte of data from the input stream. The
	* value byte is returned as an <code>int</code> in the range <code>0</code>
	* to <code>255</code>. If no byte is available because the end of the
	* stream has been reached, an Exception is thrown.
	*
	* @return The next byte of data
	* @throws IOException
	* EOF is reached or error reading the stream
	*/
	private int readOneByte() throws IOException {
	int b = in.read();
	if (b == -1) {
	throw new IOException("Premature end of stream");
	}
	count(1);
	return b & 0xFF;
	}

	/**
	* The stream starts with the uncompressed length (up to a maximum of 2^32 -
	* 1), stored as a little-endian varint. Varints consist of a series of
	* bytes, where the lower 7 bits are data and the upper bit is set iff there
	* are more bytes to be read. In other words, an uncompressed length of 64
	* would be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
	* would be stored as 0xFE 0xFF 0x7F.
	*
	* @return The size of the uncompressed data
	*
	* @throws IOException
	* Could not read a byte
	*/
	private long readSize() throws IOException {
	int index = 0;
	long sz = 0;
	int b = 0;

	do {
	b = readOneByte();
	sz \|= (b & 0x7f) << (index++ * 7);
	} while (0 != (b & 0x80));
	return sz;
	}

	/**
	* Get the uncompressed size of the stream
	*
	* @return the uncompressed size
	*/
	public int getSize() {
	return size;
	}

	}