Blame - src/main/java/org/apache/commons/compress/archivers/tar/TarInputStream.java - platform/external/apache-commons-compress

blob: bf05051a4fe961308be7e72b5400567d6839e2fe [file] [log] [blame]

Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	1	/*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	2	* Licensed to the Apache Software Foundation (ASF) under one or more
				3	* contributor license agreements. See the NOTICE file distributed with
				4	* this work for additional information regarding copyright ownership.
				5	* The ASF licenses this file to You under the Apache License, Version 2.0
				6	* (the "License"); you may not use this file except in compliance with
				7	* the License. You may obtain a copy of the License at
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	8	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	9	* http://www.apache.org/licenses/LICENSE-2.0
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	10	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	17	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	18
				19	/*
				20	* This package is based on the work done by Timothy Gerard Endres
				21	* (time@ice.com) to whom the Ant project is very grateful for his great code.
				22	*/
				23
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	24	package org.apache.commons.compress.archivers.tar;
				25
				26	import java.io.FilterInputStream;
				27	import java.io.IOException;
				28	import java.io.InputStream;
				29	import java.io.OutputStream;
				30
				31	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	32	* The TarInputStream reads a UNIX tar archive as an InputStream.
				33	* methods are provided to position at each successive entry in
				34	* the archive, and the read each entry as a normal input stream
				35	* using read().
				36	*
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	37	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	38	public class TarInputStream extends FilterInputStream {
				39	private static final int SMALL_BUFFER_SIZE = 256;
				40	private static final int BUFFER_SIZE = 8 * 1024;
				41	private static final int LARGE_BUFFER_SIZE = 32 * 1024;
				42	private static final int BYTE_MASK = 0xFF;
				43
				44	// CheckStyle:VisibilityModifier OFF - bc
				45	protected boolean debug;
				46	protected boolean hasHitEOF;
				47	protected long entrySize;
				48	protected long entryOffset;
				49	protected byte[] readBuf;
				50	protected TarBuffer buffer;
				51	protected TarArchiveEntry currEntry;
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	52
				53	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	54	* This contents of this array is not used at all in this class,
				55	* it is only here to avoid repreated object creation during calls
				56	* to the no-arg read method.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	57	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	58	protected byte[] oneBuf;
				59
				60	// CheckStyle:VisibilityModifier ON
				61
				62	/**
				63	* Constructor for TarInputStream.
				64	* @param is the input stream to use
				65	*/
				66	public TarInputStream(InputStream is) {
				67	this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	68	}
				69
				70	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	71	* Constructor for TarInputStream.
				72	* @param is the input stream to use
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	73	* @param blockSize the block size to use
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	74	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	75	public TarInputStream(InputStream is, int blockSize) {
				76	this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	77	}
				78
				79	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	80	* Constructor for TarInputStream.
				81	* @param is the input stream to use
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	82	* @param blockSize the block size to use
				83	* @param recordSize the record size to use
				84	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	85	public TarInputStream(InputStream is, int blockSize, int recordSize) {
				86	super(is);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	87
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	88	this.buffer = new TarBuffer(is, blockSize, recordSize);
				89	this.readBuf = null;
				90	this.oneBuf = new byte[1];
				91	this.debug = false;
				92	this.hasHitEOF = false;
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	93	}
				94
				95	/**
				96	* Sets the debugging flag.
				97	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	98	* @param debug True to turn on debugging.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	99	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	100	public void setDebug(boolean debug) {
				101	this.debug = debug;
				102	buffer.setDebug(debug);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	103	}
				104
				105	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	106	* Closes this stream. Calls the TarBuffer's close() method.
				107	* @throws IOException on error
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	108	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	109	public void close() throws IOException {
				110	buffer.close();
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	111	}
				112
				113	/**
				114	* Get the record size being used by this stream's TarBuffer.
				115	*
				116	* @return The TarBuffer record size.
				117	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	118	public int getRecordSize() {
				119	return buffer.getRecordSize();
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	120	}
				121
				122	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	123	* Get the available data that can be read from the current
				124	* entry in the archive. This does not indicate how much data
				125	* is left in the entire archive, only in the current entry.
				126	* This value is determined from the entry's size header field
				127	* and the amount of data already read from the current entry.
				128	* Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
				129	* bytes are left in the current entry in the archive.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	130	*
				131	* @return The number of available bytes for the current entry.
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	132	* @throws IOException for signature
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	133	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	134	public int available() throws IOException {
				135	if (entrySize - entryOffset > Integer.MAX_VALUE) {
				136	return Integer.MAX_VALUE;
				137	}
				138	return (int) (entrySize - entryOffset);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	139	}
				140
				141	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	142	* Skip bytes in the input buffer. This skips bytes in the
				143	* current entry's data, not the entire archive, and will
				144	* stop at the end of the current entry's data if the number
				145	* to skip extends beyond that point.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	146	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	147	* @param numToSkip The number of bytes to skip.
				148	* @return the number actually skipped
				149	* @throws IOException on error
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	150	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	151	public long skip(long numToSkip) throws IOException {
				152	// REVIEW
				153	// This is horribly inefficient, but it ensures that we
				154	// properly skip over bytes via the TarBuffer...
				155	//
				156	byte[] skipBuf = new byte[BUFFER_SIZE];
				157	long skip = numToSkip;
				158	while (skip > 0) {
				159	int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
				160	int numRead = read(skipBuf, 0, realSkip);
				161	if (numRead == -1) {
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	162	break;
				163	}
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	164	skip -= numRead;
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	165	}
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	166	return (numToSkip - skip);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	167	}
				168
				169	/**
				170	* Since we do not support marking just yet, we return false.
				171	*
				172	* @return False.
				173	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	174	public boolean markSupported() {
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	175	return false;
				176	}
				177
				178	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	179	* Since we do not support marking just yet, we do nothing.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	180	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	181	* @param markLimit The limit to mark.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	182	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	183	public void mark(int markLimit) {
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	184	}
				185
				186	/**
				187	* Since we do not support marking just yet, we do nothing.
				188	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	189	public void reset() {
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	190	}
				191
				192	/**
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	193	* Get the next entry in this tar archive. This will skip
				194	* over any remaining data in the current entry, if there
				195	* is one, and place the input stream at the header of the
				196	* next entry, and read the header and instantiate a new
				197	* TarEntry from the header bytes and return that entry.
				198	* If there are no more entries in the archive, null will
				199	* be returned to indicate that the end of the archive has
				200	* been reached.
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	201	*
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	202	* @return The next TarEntry in the archive, or null.
				203	* @throws IOException on error
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	204	*/
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	205	public TarArchiveEntry getNextEntry() throws IOException {
				206	if (hasHitEOF) {
				207	return null;
				208	}
				209
				210	if (currEntry != null) {
				211	long numToSkip = entrySize - entryOffset;
				212
				213	if (debug) {
				214	System.err.println("TarInputStream: SKIP currENTRY '"
				215	+ currEntry.getName() + "' SZ "
				216	+ entrySize + " OFF "
				217	+ entryOffset + " skipping "
				218	+ numToSkip + " bytes");
				219	}
				220
				221	if (numToSkip > 0) {
				222	skip(numToSkip);
				223	}
				224
				225	readBuf = null;
				226	}
				227
				228	byte[] headerBuf = buffer.readRecord();
				229
				230	if (headerBuf == null) {
				231	if (debug) {
				232	System.err.println("READ NULL RECORD");
				233	}
				234	hasHitEOF = true;
				235	} else if (buffer.isEOFRecord(headerBuf)) {
				236	if (debug) {
				237	System.err.println("READ EOF RECORD");
				238	}
				239	hasHitEOF = true;
				240	}
				241
				242	if (hasHitEOF) {
				243	currEntry = null;
				244	} else {
				245	currEntry = new TarArchiveEntry(headerBuf);
				246
				247	if (debug) {
				248	System.err.println("TarInputStream: SET CURRENTRY '"
				249	+ currEntry.getName()
				250	+ "' size = "
				251	+ currEntry.getSize());
				252	}
				253
				254	entryOffset = 0;
				255
				256	entrySize = currEntry.getSize();
				257	}
				258
				259	if (currEntry != null && currEntry.isGNULongNameEntry()) {
				260	// read in the name
				261	StringBuffer longName = new StringBuffer();
				262	byte[] buf = new byte[SMALL_BUFFER_SIZE];
				263	int length = 0;
				264	while ((length = read(buf)) >= 0) {
				265	longName.append(new String(buf, 0, length));
				266	}
				267	getNextEntry();
				268	if (currEntry == null) {
				269	// Bugzilla: 40334
				270	// Malformed tar file - long entry name not followed by entry
				271	return null;
				272	}
				273	// remove trailing null terminator
				274	if (longName.length() > 0
				275	&& longName.charAt(longName.length() - 1) == 0) {
				276	longName.deleteCharAt(longName.length() - 1);
				277	}
				278	currEntry.setName(longName.toString());
				279	}
				280
				281	return currEntry;
				282	}
				283
				284	/**
				285	* Reads a byte from the current tar archive entry.
				286	*
				287	* This method simply calls read( byte[], int, int ).
				288	*
				289	* @return The byte read, or -1 at EOF.
				290	* @throws IOException on error
				291	*/
				292	public int read() throws IOException {
				293	int num = read(oneBuf, 0, 1);
				294	return num == -1 ? -1 : ((int) oneBuf[0]) & BYTE_MASK;
				295	}
				296
				297	/**
				298	* Reads bytes from the current tar archive entry.
				299	*
				300	* This method is aware of the boundaries of the current
				301	* entry in the archive and will deal with them as if they
				302	* were this stream's start and EOF.
				303	*
				304	* @param buf The buffer into which to place bytes read.
				305	* @param offset The offset at which to place bytes read.
				306	* @param numToRead The number of bytes to read.
				307	* @return The number of bytes read, or -1 at EOF.
				308	* @throws IOException on error
				309	*/
				310	public int read(byte[] buf, int offset, int numToRead) throws IOException {
				311	int totalRead = 0;
				312
				313	if (entryOffset >= entrySize) {
				314	return -1;
				315	}
				316
				317	if ((numToRead + entryOffset) > entrySize) {
				318	numToRead = (int) (entrySize - entryOffset);
				319	}
				320
				321	if (readBuf != null) {
				322	int sz = (numToRead > readBuf.length) ? readBuf.length
				323	: numToRead;
				324
				325	System.arraycopy(readBuf, 0, buf, offset, sz);
				326
				327	if (sz >= readBuf.length) {
				328	readBuf = null;
				329	} else {
				330	int newLen = readBuf.length - sz;
				331	byte[] newBuf = new byte[newLen];
				332
				333	System.arraycopy(readBuf, sz, newBuf, 0, newLen);
				334
				335	readBuf = newBuf;
				336	}
				337
				338	totalRead += sz;
				339	numToRead -= sz;
				340	offset += sz;
				341	}
				342
				343	while (numToRead > 0) {
				344	byte[] rec = buffer.readRecord();
				345
				346	if (rec == null) {
				347	// Unexpected EOF!
				348	throw new IOException("unexpected EOF with " + numToRead
				349	+ " bytes unread");
				350	}
				351
				352	int sz = numToRead;
				353	int recLen = rec.length;
				354
				355	if (recLen > sz) {
				356	System.arraycopy(rec, 0, buf, offset, sz);
				357
				358	readBuf = new byte[recLen - sz];
				359
				360	System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
				361	} else {
				362	sz = recLen;
				363
				364	System.arraycopy(rec, 0, buf, offset, recLen);
				365	}
				366
				367	totalRead += sz;
				368	numToRead -= sz;
				369	offset += sz;
				370	}
				371
				372	entryOffset += totalRead;
				373
				374	return totalRead;
				375	}
				376
				377	/**
				378	* Copies the contents of the current tar archive entry directly into
				379	* an output stream.
				380	*
				381	* @param out The OutputStream into which to write the entry's data.
				382	* @throws IOException on error
				383	*/
				384	public void copyEntryContents(OutputStream out) throws IOException {
				385	byte[] buf = new byte[LARGE_BUFFER_SIZE];
				386
				387	while (true) {
				388	int numRead = read(buf, 0, buf.length);
				389
				390	if (numRead == -1) {
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	391	break;
				392	}
				393
Torsten Curdt	46ad24d	2009-01-08 11:09:25 +0000	[diff] [blame^]	394	out.write(buf, 0, numRead);
Torsten Curdt	ca16539	2008-07-10 10:17:44 +0000	[diff] [blame]	395	}
				396	}
				397	}