blob: bf05051a4fe961308be7e72b5400567d6839e2fe [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
Torsten Curdt46ad24d2009-01-08 11:09:25 +00002 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
Torsten Curdtca165392008-07-10 10:17:44 +00008 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +00009 * http://www.apache.org/licenses/LICENSE-2.0
Torsten Curdtca165392008-07-10 10:17:44 +000010 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +000011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
Torsten Curdtca165392008-07-10 10:17:44 +000017 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000018
19/*
20 * This package is based on the work done by Timothy Gerard Endres
21 * (time@ice.com) to whom the Ant project is very grateful for his great code.
22 */
23
Torsten Curdtca165392008-07-10 10:17:44 +000024package org.apache.commons.compress.archivers.tar;
25
26import java.io.FilterInputStream;
27import java.io.IOException;
28import java.io.InputStream;
29import java.io.OutputStream;
30
31/**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000032 * The TarInputStream reads a UNIX tar archive as an InputStream.
33 * methods are provided to position at each successive entry in
34 * the archive, and the read each entry as a normal input stream
35 * using read().
36 *
Torsten Curdtca165392008-07-10 10:17:44 +000037 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000038public class TarInputStream extends FilterInputStream {
39 private static final int SMALL_BUFFER_SIZE = 256;
40 private static final int BUFFER_SIZE = 8 * 1024;
41 private static final int LARGE_BUFFER_SIZE = 32 * 1024;
42 private static final int BYTE_MASK = 0xFF;
43
44 // CheckStyle:VisibilityModifier OFF - bc
45 protected boolean debug;
46 protected boolean hasHitEOF;
47 protected long entrySize;
48 protected long entryOffset;
49 protected byte[] readBuf;
50 protected TarBuffer buffer;
51 protected TarArchiveEntry currEntry;
Torsten Curdtca165392008-07-10 10:17:44 +000052
53 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000054 * This contents of this array is not used at all in this class,
55 * it is only here to avoid repreated object creation during calls
56 * to the no-arg read method.
Torsten Curdtca165392008-07-10 10:17:44 +000057 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000058 protected byte[] oneBuf;
59
60 // CheckStyle:VisibilityModifier ON
61
62 /**
63 * Constructor for TarInputStream.
64 * @param is the input stream to use
65 */
66 public TarInputStream(InputStream is) {
67 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdtca165392008-07-10 10:17:44 +000068 }
69
70 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000071 * Constructor for TarInputStream.
72 * @param is the input stream to use
Torsten Curdtca165392008-07-10 10:17:44 +000073 * @param blockSize the block size to use
Torsten Curdtca165392008-07-10 10:17:44 +000074 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000075 public TarInputStream(InputStream is, int blockSize) {
76 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdtca165392008-07-10 10:17:44 +000077 }
78
79 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000080 * Constructor for TarInputStream.
81 * @param is the input stream to use
Torsten Curdtca165392008-07-10 10:17:44 +000082 * @param blockSize the block size to use
83 * @param recordSize the record size to use
84 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000085 public TarInputStream(InputStream is, int blockSize, int recordSize) {
86 super(is);
Torsten Curdtca165392008-07-10 10:17:44 +000087
Torsten Curdt46ad24d2009-01-08 11:09:25 +000088 this.buffer = new TarBuffer(is, blockSize, recordSize);
89 this.readBuf = null;
90 this.oneBuf = new byte[1];
91 this.debug = false;
92 this.hasHitEOF = false;
Torsten Curdtca165392008-07-10 10:17:44 +000093 }
94
95 /**
96 * Sets the debugging flag.
97 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +000098 * @param debug True to turn on debugging.
Torsten Curdtca165392008-07-10 10:17:44 +000099 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000100 public void setDebug(boolean debug) {
101 this.debug = debug;
102 buffer.setDebug(debug);
Torsten Curdtca165392008-07-10 10:17:44 +0000103 }
104
105 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000106 * Closes this stream. Calls the TarBuffer's close() method.
107 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000108 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000109 public void close() throws IOException {
110 buffer.close();
Torsten Curdtca165392008-07-10 10:17:44 +0000111 }
112
113 /**
114 * Get the record size being used by this stream's TarBuffer.
115 *
116 * @return The TarBuffer record size.
117 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000118 public int getRecordSize() {
119 return buffer.getRecordSize();
Torsten Curdtca165392008-07-10 10:17:44 +0000120 }
121
122 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000123 * Get the available data that can be read from the current
124 * entry in the archive. This does not indicate how much data
125 * is left in the entire archive, only in the current entry.
126 * This value is determined from the entry's size header field
127 * and the amount of data already read from the current entry.
128 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
129 * bytes are left in the current entry in the archive.
Torsten Curdtca165392008-07-10 10:17:44 +0000130 *
131 * @return The number of available bytes for the current entry.
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000132 * @throws IOException for signature
Torsten Curdtca165392008-07-10 10:17:44 +0000133 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000134 public int available() throws IOException {
135 if (entrySize - entryOffset > Integer.MAX_VALUE) {
136 return Integer.MAX_VALUE;
137 }
138 return (int) (entrySize - entryOffset);
Torsten Curdtca165392008-07-10 10:17:44 +0000139 }
140
141 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000142 * Skip bytes in the input buffer. This skips bytes in the
143 * current entry's data, not the entire archive, and will
144 * stop at the end of the current entry's data if the number
145 * to skip extends beyond that point.
Torsten Curdtca165392008-07-10 10:17:44 +0000146 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000147 * @param numToSkip The number of bytes to skip.
148 * @return the number actually skipped
149 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000150 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000151 public long skip(long numToSkip) throws IOException {
152 // REVIEW
153 // This is horribly inefficient, but it ensures that we
154 // properly skip over bytes via the TarBuffer...
155 //
156 byte[] skipBuf = new byte[BUFFER_SIZE];
157 long skip = numToSkip;
158 while (skip > 0) {
159 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
160 int numRead = read(skipBuf, 0, realSkip);
161 if (numRead == -1) {
Torsten Curdtca165392008-07-10 10:17:44 +0000162 break;
163 }
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000164 skip -= numRead;
Torsten Curdtca165392008-07-10 10:17:44 +0000165 }
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000166 return (numToSkip - skip);
Torsten Curdtca165392008-07-10 10:17:44 +0000167 }
168
169 /**
170 * Since we do not support marking just yet, we return false.
171 *
172 * @return False.
173 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000174 public boolean markSupported() {
Torsten Curdtca165392008-07-10 10:17:44 +0000175 return false;
176 }
177
178 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000179 * Since we do not support marking just yet, we do nothing.
Torsten Curdtca165392008-07-10 10:17:44 +0000180 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000181 * @param markLimit The limit to mark.
Torsten Curdtca165392008-07-10 10:17:44 +0000182 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000183 public void mark(int markLimit) {
Torsten Curdtca165392008-07-10 10:17:44 +0000184 }
185
186 /**
187 * Since we do not support marking just yet, we do nothing.
188 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000189 public void reset() {
Torsten Curdtca165392008-07-10 10:17:44 +0000190 }
191
192 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000193 * Get the next entry in this tar archive. This will skip
194 * over any remaining data in the current entry, if there
195 * is one, and place the input stream at the header of the
196 * next entry, and read the header and instantiate a new
197 * TarEntry from the header bytes and return that entry.
198 * If there are no more entries in the archive, null will
199 * be returned to indicate that the end of the archive has
200 * been reached.
Torsten Curdtca165392008-07-10 10:17:44 +0000201 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000202 * @return The next TarEntry in the archive, or null.
203 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000204 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000205 public TarArchiveEntry getNextEntry() throws IOException {
206 if (hasHitEOF) {
207 return null;
208 }
209
210 if (currEntry != null) {
211 long numToSkip = entrySize - entryOffset;
212
213 if (debug) {
214 System.err.println("TarInputStream: SKIP currENTRY '"
215 + currEntry.getName() + "' SZ "
216 + entrySize + " OFF "
217 + entryOffset + " skipping "
218 + numToSkip + " bytes");
219 }
220
221 if (numToSkip > 0) {
222 skip(numToSkip);
223 }
224
225 readBuf = null;
226 }
227
228 byte[] headerBuf = buffer.readRecord();
229
230 if (headerBuf == null) {
231 if (debug) {
232 System.err.println("READ NULL RECORD");
233 }
234 hasHitEOF = true;
235 } else if (buffer.isEOFRecord(headerBuf)) {
236 if (debug) {
237 System.err.println("READ EOF RECORD");
238 }
239 hasHitEOF = true;
240 }
241
242 if (hasHitEOF) {
243 currEntry = null;
244 } else {
245 currEntry = new TarArchiveEntry(headerBuf);
246
247 if (debug) {
248 System.err.println("TarInputStream: SET CURRENTRY '"
249 + currEntry.getName()
250 + "' size = "
251 + currEntry.getSize());
252 }
253
254 entryOffset = 0;
255
256 entrySize = currEntry.getSize();
257 }
258
259 if (currEntry != null && currEntry.isGNULongNameEntry()) {
260 // read in the name
261 StringBuffer longName = new StringBuffer();
262 byte[] buf = new byte[SMALL_BUFFER_SIZE];
263 int length = 0;
264 while ((length = read(buf)) >= 0) {
265 longName.append(new String(buf, 0, length));
266 }
267 getNextEntry();
268 if (currEntry == null) {
269 // Bugzilla: 40334
270 // Malformed tar file - long entry name not followed by entry
271 return null;
272 }
273 // remove trailing null terminator
274 if (longName.length() > 0
275 && longName.charAt(longName.length() - 1) == 0) {
276 longName.deleteCharAt(longName.length() - 1);
277 }
278 currEntry.setName(longName.toString());
279 }
280
281 return currEntry;
282 }
283
284 /**
285 * Reads a byte from the current tar archive entry.
286 *
287 * This method simply calls read( byte[], int, int ).
288 *
289 * @return The byte read, or -1 at EOF.
290 * @throws IOException on error
291 */
292 public int read() throws IOException {
293 int num = read(oneBuf, 0, 1);
294 return num == -1 ? -1 : ((int) oneBuf[0]) & BYTE_MASK;
295 }
296
297 /**
298 * Reads bytes from the current tar archive entry.
299 *
300 * This method is aware of the boundaries of the current
301 * entry in the archive and will deal with them as if they
302 * were this stream's start and EOF.
303 *
304 * @param buf The buffer into which to place bytes read.
305 * @param offset The offset at which to place bytes read.
306 * @param numToRead The number of bytes to read.
307 * @return The number of bytes read, or -1 at EOF.
308 * @throws IOException on error
309 */
310 public int read(byte[] buf, int offset, int numToRead) throws IOException {
311 int totalRead = 0;
312
313 if (entryOffset >= entrySize) {
314 return -1;
315 }
316
317 if ((numToRead + entryOffset) > entrySize) {
318 numToRead = (int) (entrySize - entryOffset);
319 }
320
321 if (readBuf != null) {
322 int sz = (numToRead > readBuf.length) ? readBuf.length
323 : numToRead;
324
325 System.arraycopy(readBuf, 0, buf, offset, sz);
326
327 if (sz >= readBuf.length) {
328 readBuf = null;
329 } else {
330 int newLen = readBuf.length - sz;
331 byte[] newBuf = new byte[newLen];
332
333 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
334
335 readBuf = newBuf;
336 }
337
338 totalRead += sz;
339 numToRead -= sz;
340 offset += sz;
341 }
342
343 while (numToRead > 0) {
344 byte[] rec = buffer.readRecord();
345
346 if (rec == null) {
347 // Unexpected EOF!
348 throw new IOException("unexpected EOF with " + numToRead
349 + " bytes unread");
350 }
351
352 int sz = numToRead;
353 int recLen = rec.length;
354
355 if (recLen > sz) {
356 System.arraycopy(rec, 0, buf, offset, sz);
357
358 readBuf = new byte[recLen - sz];
359
360 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
361 } else {
362 sz = recLen;
363
364 System.arraycopy(rec, 0, buf, offset, recLen);
365 }
366
367 totalRead += sz;
368 numToRead -= sz;
369 offset += sz;
370 }
371
372 entryOffset += totalRead;
373
374 return totalRead;
375 }
376
377 /**
378 * Copies the contents of the current tar archive entry directly into
379 * an output stream.
380 *
381 * @param out The OutputStream into which to write the entry's data.
382 * @throws IOException on error
383 */
384 public void copyEntryContents(OutputStream out) throws IOException {
385 byte[] buf = new byte[LARGE_BUFFER_SIZE];
386
387 while (true) {
388 int numRead = read(buf, 0, buf.length);
389
390 if (numRead == -1) {
Torsten Curdtca165392008-07-10 10:17:44 +0000391 break;
392 }
393
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000394 out.write(buf, 0, numRead);
Torsten Curdtca165392008-07-10 10:17:44 +0000395 }
396 }
397}