blob: 735e7f86639eeb7d3ee20fcf5f162dd2592aec44 [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
Torsten Curdt46ad24d2009-01-08 11:09:25 +00002 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
Torsten Curdtca165392008-07-10 10:17:44 +00008 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +00009 * http://www.apache.org/licenses/LICENSE-2.0
Torsten Curdtca165392008-07-10 10:17:44 +000010 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +000011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
Torsten Curdtca165392008-07-10 10:17:44 +000017 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000018
19/*
20 * This package is based on the work done by Timothy Gerard Endres
21 * (time@ice.com) to whom the Ant project is very grateful for his great code.
22 */
23
Torsten Curdtca165392008-07-10 10:17:44 +000024package org.apache.commons.compress.archivers.tar;
25
26import java.io.FilterInputStream;
27import java.io.IOException;
28import java.io.InputStream;
29import java.io.OutputStream;
30
31/**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000032 * The TarInputStream reads a UNIX tar archive as an InputStream.
33 * methods are provided to position at each successive entry in
34 * the archive, and the read each entry as a normal input stream
35 * using read().
36 *
Torsten Curdtca165392008-07-10 10:17:44 +000037 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000038public class TarInputStream extends FilterInputStream {
39 private static final int SMALL_BUFFER_SIZE = 256;
40 private static final int BUFFER_SIZE = 8 * 1024;
41 private static final int LARGE_BUFFER_SIZE = 32 * 1024;
42 private static final int BYTE_MASK = 0xFF;
43
44 // CheckStyle:VisibilityModifier OFF - bc
45 protected boolean debug;
46 protected boolean hasHitEOF;
47 protected long entrySize;
48 protected long entryOffset;
49 protected byte[] readBuf;
50 protected TarBuffer buffer;
51 protected TarArchiveEntry currEntry;
Torsten Curdtca165392008-07-10 10:17:44 +000052
53 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000054 * This contents of this array is not used at all in this class,
55 * it is only here to avoid repreated object creation during calls
56 * to the no-arg read method.
Torsten Curdtca165392008-07-10 10:17:44 +000057 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000058 protected byte[] oneBuf;
59
60 // CheckStyle:VisibilityModifier ON
61
62 /**
63 * Constructor for TarInputStream.
64 * @param is the input stream to use
65 */
66 public TarInputStream(InputStream is) {
67 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdtca165392008-07-10 10:17:44 +000068 }
69
70 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000071 * Constructor for TarInputStream.
72 * @param is the input stream to use
Torsten Curdtca165392008-07-10 10:17:44 +000073 * @param blockSize the block size to use
Torsten Curdtca165392008-07-10 10:17:44 +000074 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000075 public TarInputStream(InputStream is, int blockSize) {
76 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
Torsten Curdtca165392008-07-10 10:17:44 +000077 }
78
79 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +000080 * Constructor for TarInputStream.
81 * @param is the input stream to use
Torsten Curdtca165392008-07-10 10:17:44 +000082 * @param blockSize the block size to use
83 * @param recordSize the record size to use
84 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +000085 public TarInputStream(InputStream is, int blockSize, int recordSize) {
86 super(is);
Torsten Curdtca165392008-07-10 10:17:44 +000087
Torsten Curdt46ad24d2009-01-08 11:09:25 +000088 this.buffer = new TarBuffer(is, blockSize, recordSize);
89 this.readBuf = null;
90 this.oneBuf = new byte[1];
91 this.debug = false;
92 this.hasHitEOF = false;
Torsten Curdtca165392008-07-10 10:17:44 +000093 }
94
95 /**
96 * Sets the debugging flag.
97 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +000098 * @param debug True to turn on debugging.
Torsten Curdtca165392008-07-10 10:17:44 +000099 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000100 public void setDebug(boolean debug) {
101 this.debug = debug;
102 buffer.setDebug(debug);
Torsten Curdtca165392008-07-10 10:17:44 +0000103 }
104
105 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000106 * Closes this stream. Calls the TarBuffer's close() method.
107 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000108 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000109 public void close() throws IOException {
110 buffer.close();
Torsten Curdtca165392008-07-10 10:17:44 +0000111 }
112
113 /**
114 * Get the record size being used by this stream's TarBuffer.
115 *
116 * @return The TarBuffer record size.
117 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000118 public int getRecordSize() {
119 return buffer.getRecordSize();
Torsten Curdtca165392008-07-10 10:17:44 +0000120 }
121
122 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000123 * Get the available data that can be read from the current
124 * entry in the archive. This does not indicate how much data
125 * is left in the entire archive, only in the current entry.
126 * This value is determined from the entry's size header field
127 * and the amount of data already read from the current entry.
128 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
129 * bytes are left in the current entry in the archive.
Torsten Curdtca165392008-07-10 10:17:44 +0000130 *
131 * @return The number of available bytes for the current entry.
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000132 * @throws IOException for signature
Torsten Curdtca165392008-07-10 10:17:44 +0000133 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000134 public int available() throws IOException {
135 if (entrySize - entryOffset > Integer.MAX_VALUE) {
136 return Integer.MAX_VALUE;
137 }
138 return (int) (entrySize - entryOffset);
Torsten Curdtca165392008-07-10 10:17:44 +0000139 }
140
141 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000142 * Skip bytes in the input buffer. This skips bytes in the
143 * current entry's data, not the entire archive, and will
144 * stop at the end of the current entry's data if the number
145 * to skip extends beyond that point.
Torsten Curdtca165392008-07-10 10:17:44 +0000146 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000147 * @param numToSkip The number of bytes to skip.
148 * @return the number actually skipped
149 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000150 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000151 public long skip(long numToSkip) throws IOException {
152 // REVIEW
153 // This is horribly inefficient, but it ensures that we
154 // properly skip over bytes via the TarBuffer...
155 //
156 byte[] skipBuf = new byte[BUFFER_SIZE];
157 long skip = numToSkip;
158 while (skip > 0) {
159 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
160 int numRead = read(skipBuf, 0, realSkip);
161 if (numRead == -1) {
Torsten Curdtca165392008-07-10 10:17:44 +0000162 break;
163 }
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000164 skip -= numRead;
Torsten Curdtca165392008-07-10 10:17:44 +0000165 }
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000166 return (numToSkip - skip);
Torsten Curdtca165392008-07-10 10:17:44 +0000167 }
168
169 /**
170 * Since we do not support marking just yet, we return false.
171 *
172 * @return False.
173 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000174 public boolean markSupported() {
Torsten Curdtca165392008-07-10 10:17:44 +0000175 return false;
176 }
177
178 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000179 * Since we do not support marking just yet, we do nothing.
Torsten Curdtca165392008-07-10 10:17:44 +0000180 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000181 * @param markLimit The limit to mark.
Torsten Curdtca165392008-07-10 10:17:44 +0000182 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000183 public void mark(int markLimit) {
Torsten Curdtca165392008-07-10 10:17:44 +0000184 }
185
186 /**
187 * Since we do not support marking just yet, we do nothing.
188 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000189 public void reset() {
Torsten Curdtca165392008-07-10 10:17:44 +0000190 }
191
192 /**
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000193 * Get the next entry in this tar archive. This will skip
194 * over any remaining data in the current entry, if there
195 * is one, and place the input stream at the header of the
196 * next entry, and read the header and instantiate a new
197 * TarEntry from the header bytes and return that entry.
198 * If there are no more entries in the archive, null will
199 * be returned to indicate that the end of the archive has
200 * been reached.
Torsten Curdtca165392008-07-10 10:17:44 +0000201 *
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000202 * @return The next TarEntry in the archive, or null.
203 * @throws IOException on error
Torsten Curdtca165392008-07-10 10:17:44 +0000204 */
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000205 public TarArchiveEntry getNextEntry() throws IOException {
206 if (hasHitEOF) {
207 return null;
208 }
209
210 if (currEntry != null) {
211 long numToSkip = entrySize - entryOffset;
212
213 if (debug) {
214 System.err.println("TarInputStream: SKIP currENTRY '"
215 + currEntry.getName() + "' SZ "
216 + entrySize + " OFF "
217 + entryOffset + " skipping "
218 + numToSkip + " bytes");
219 }
220
Stefan Bodewig5e5804c2009-02-05 12:45:23 +0000221 while (numToSkip > 0) {
222 long skipped = skip(numToSkip);
223 if (skipped <= 0) {
224 throw new RuntimeException("failed to skip current tar"
225 + " entry");
226 }
227 numToSkip -= skipped;
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000228 }
229
230 readBuf = null;
231 }
232
233 byte[] headerBuf = buffer.readRecord();
234
235 if (headerBuf == null) {
236 if (debug) {
237 System.err.println("READ NULL RECORD");
238 }
239 hasHitEOF = true;
240 } else if (buffer.isEOFRecord(headerBuf)) {
241 if (debug) {
242 System.err.println("READ EOF RECORD");
243 }
244 hasHitEOF = true;
245 }
246
247 if (hasHitEOF) {
248 currEntry = null;
249 } else {
250 currEntry = new TarArchiveEntry(headerBuf);
251
252 if (debug) {
253 System.err.println("TarInputStream: SET CURRENTRY '"
254 + currEntry.getName()
255 + "' size = "
256 + currEntry.getSize());
257 }
258
259 entryOffset = 0;
260
261 entrySize = currEntry.getSize();
262 }
263
264 if (currEntry != null && currEntry.isGNULongNameEntry()) {
265 // read in the name
266 StringBuffer longName = new StringBuffer();
267 byte[] buf = new byte[SMALL_BUFFER_SIZE];
268 int length = 0;
269 while ((length = read(buf)) >= 0) {
270 longName.append(new String(buf, 0, length));
271 }
272 getNextEntry();
273 if (currEntry == null) {
274 // Bugzilla: 40334
275 // Malformed tar file - long entry name not followed by entry
276 return null;
277 }
278 // remove trailing null terminator
279 if (longName.length() > 0
280 && longName.charAt(longName.length() - 1) == 0) {
281 longName.deleteCharAt(longName.length() - 1);
282 }
283 currEntry.setName(longName.toString());
284 }
285
286 return currEntry;
287 }
288
289 /**
290 * Reads a byte from the current tar archive entry.
291 *
292 * This method simply calls read( byte[], int, int ).
293 *
294 * @return The byte read, or -1 at EOF.
295 * @throws IOException on error
296 */
297 public int read() throws IOException {
298 int num = read(oneBuf, 0, 1);
299 return num == -1 ? -1 : ((int) oneBuf[0]) & BYTE_MASK;
300 }
301
302 /**
303 * Reads bytes from the current tar archive entry.
304 *
305 * This method is aware of the boundaries of the current
306 * entry in the archive and will deal with them as if they
307 * were this stream's start and EOF.
308 *
309 * @param buf The buffer into which to place bytes read.
310 * @param offset The offset at which to place bytes read.
311 * @param numToRead The number of bytes to read.
312 * @return The number of bytes read, or -1 at EOF.
313 * @throws IOException on error
314 */
315 public int read(byte[] buf, int offset, int numToRead) throws IOException {
316 int totalRead = 0;
317
318 if (entryOffset >= entrySize) {
319 return -1;
320 }
321
322 if ((numToRead + entryOffset) > entrySize) {
323 numToRead = (int) (entrySize - entryOffset);
324 }
325
326 if (readBuf != null) {
327 int sz = (numToRead > readBuf.length) ? readBuf.length
328 : numToRead;
329
330 System.arraycopy(readBuf, 0, buf, offset, sz);
331
332 if (sz >= readBuf.length) {
333 readBuf = null;
334 } else {
335 int newLen = readBuf.length - sz;
336 byte[] newBuf = new byte[newLen];
337
338 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
339
340 readBuf = newBuf;
341 }
342
343 totalRead += sz;
344 numToRead -= sz;
345 offset += sz;
346 }
347
348 while (numToRead > 0) {
349 byte[] rec = buffer.readRecord();
350
351 if (rec == null) {
352 // Unexpected EOF!
353 throw new IOException("unexpected EOF with " + numToRead
354 + " bytes unread");
355 }
356
357 int sz = numToRead;
358 int recLen = rec.length;
359
360 if (recLen > sz) {
361 System.arraycopy(rec, 0, buf, offset, sz);
362
363 readBuf = new byte[recLen - sz];
364
365 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
366 } else {
367 sz = recLen;
368
369 System.arraycopy(rec, 0, buf, offset, recLen);
370 }
371
372 totalRead += sz;
373 numToRead -= sz;
374 offset += sz;
375 }
376
377 entryOffset += totalRead;
378
379 return totalRead;
380 }
381
382 /**
383 * Copies the contents of the current tar archive entry directly into
384 * an output stream.
385 *
386 * @param out The OutputStream into which to write the entry's data.
387 * @throws IOException on error
388 */
389 public void copyEntryContents(OutputStream out) throws IOException {
390 byte[] buf = new byte[LARGE_BUFFER_SIZE];
391
392 while (true) {
393 int numRead = read(buf, 0, buf.length);
394
395 if (numRead == -1) {
Torsten Curdtca165392008-07-10 10:17:44 +0000396 break;
397 }
398
Torsten Curdt46ad24d2009-01-08 11:09:25 +0000399 out.write(buf, 0, numRead);
Torsten Curdtca165392008-07-10 10:17:44 +0000400 }
401 }
402}