blob: 1d18efaf47dbb10c1c6be841a9da8e8b15a860cb [file] [log] [blame]
Stefan Bodewig0a986c62009-02-12 03:13:10 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18
19/*
20 * This package is based on the work done by Timothy Gerard Endres
21 * (time@ice.com) to whom the Ant project is very grateful for his great code.
22 */
23
24package org.apache.commons.compress.archivers.tar;
25
26import java.io.IOException;
27import java.io.InputStream;
28import java.io.OutputStream;
29import org.apache.commons.compress.archivers.ArchiveEntry;
30import org.apache.commons.compress.archivers.ArchiveInputStream;
Sebastian Bazley8118f822009-04-02 23:34:48 +000031import org.apache.commons.compress.utils.ArchiveUtils;
Stefan Bodewig0a986c62009-02-12 03:13:10 +000032
33/**
34 * The TarInputStream reads a UNIX tar archive as an InputStream.
35 * methods are provided to position at each successive entry in
36 * the archive, and the read each entry as a normal input stream
37 * using read().
Sebastian Bazley99870ef2009-03-28 00:04:36 +000038 * @NotThreadSafe
Stefan Bodewig0a986c62009-02-12 03:13:10 +000039 */
40public class TarArchiveInputStream extends ArchiveInputStream {
41 private static final int SMALL_BUFFER_SIZE = 256;
42 private static final int BUFFER_SIZE = 8 * 1024;
43 private static final int LARGE_BUFFER_SIZE = 32 * 1024;
Stefan Bodewig0a986c62009-02-12 03:13:10 +000044
Stefan Bodewig41f4a202009-03-20 15:42:37 +000045 private boolean debug;
46 private boolean hasHitEOF;
47 private long entrySize;
48 private long entryOffset;
49 private byte[] readBuf;
50 protected final TarBuffer buffer;
51 private TarArchiveEntry currEntry;
Stefan Bodewig0a986c62009-02-12 03:13:10 +000052
53 /**
Stefan Bodewig0a986c62009-02-12 03:13:10 +000054 * Constructor for TarInputStream.
55 * @param is the input stream to use
56 */
57 public TarArchiveInputStream(InputStream is) {
58 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
59 }
60
61 /**
62 * Constructor for TarInputStream.
63 * @param is the input stream to use
64 * @param blockSize the block size to use
65 */
66 public TarArchiveInputStream(InputStream is, int blockSize) {
67 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
68 }
69
70 /**
71 * Constructor for TarInputStream.
72 * @param is the input stream to use
73 * @param blockSize the block size to use
74 * @param recordSize the record size to use
75 */
76 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
Stefan Bodewig0a986c62009-02-12 03:13:10 +000077 this.buffer = new TarBuffer(is, blockSize, recordSize);
78 this.readBuf = null;
Stefan Bodewig0a986c62009-02-12 03:13:10 +000079 this.debug = false;
80 this.hasHitEOF = false;
81 }
82
83 /**
84 * Sets the debugging flag.
85 *
86 * @param debug True to turn on debugging.
87 */
88 public void setDebug(boolean debug) {
89 this.debug = debug;
90 buffer.setDebug(debug);
91 }
92
93 /**
94 * Closes this stream. Calls the TarBuffer's close() method.
95 * @throws IOException on error
96 */
97 public void close() throws IOException {
98 buffer.close();
99 }
100
101 /**
102 * Get the record size being used by this stream's TarBuffer.
103 *
104 * @return The TarBuffer record size.
105 */
106 public int getRecordSize() {
107 return buffer.getRecordSize();
108 }
109
110 /**
111 * Get the available data that can be read from the current
112 * entry in the archive. This does not indicate how much data
113 * is left in the entire archive, only in the current entry.
114 * This value is determined from the entry's size header field
115 * and the amount of data already read from the current entry.
116 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
117 * bytes are left in the current entry in the archive.
118 *
119 * @return The number of available bytes for the current entry.
120 * @throws IOException for signature
121 */
122 public int available() throws IOException {
123 if (entrySize - entryOffset > Integer.MAX_VALUE) {
124 return Integer.MAX_VALUE;
125 }
126 return (int) (entrySize - entryOffset);
127 }
128
129 /**
130 * Skip bytes in the input buffer. This skips bytes in the
131 * current entry's data, not the entire archive, and will
132 * stop at the end of the current entry's data if the number
133 * to skip extends beyond that point.
134 *
135 * @param numToSkip The number of bytes to skip.
136 * @return the number actually skipped
137 * @throws IOException on error
138 */
139 public long skip(long numToSkip) throws IOException {
140 // REVIEW
141 // This is horribly inefficient, but it ensures that we
142 // properly skip over bytes via the TarBuffer...
143 //
144 byte[] skipBuf = new byte[BUFFER_SIZE];
145 long skip = numToSkip;
146 while (skip > 0) {
147 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
148 int numRead = read(skipBuf, 0, realSkip);
149 if (numRead == -1) {
150 break;
151 }
152 skip -= numRead;
153 }
154 return (numToSkip - skip);
155 }
156
157 /**
158 * Since we do not support marking just yet, we do nothing.
159 */
160 public void reset() {
161 }
162
163 /**
164 * Get the next entry in this tar archive. This will skip
165 * over any remaining data in the current entry, if there
166 * is one, and place the input stream at the header of the
167 * next entry, and read the header and instantiate a new
168 * TarEntry from the header bytes and return that entry.
169 * If there are no more entries in the archive, null will
170 * be returned to indicate that the end of the archive has
171 * been reached.
172 *
173 * @return The next TarEntry in the archive, or null.
174 * @throws IOException on error
175 */
176 public TarArchiveEntry getNextTarEntry() throws IOException {
177 if (hasHitEOF) {
178 return null;
179 }
180
181 if (currEntry != null) {
182 long numToSkip = entrySize - entryOffset;
183
184 if (debug) {
185 System.err.println("TarInputStream: SKIP currENTRY '"
186 + currEntry.getName() + "' SZ "
187 + entrySize + " OFF "
188 + entryOffset + " skipping "
189 + numToSkip + " bytes");
190 }
191
192 while (numToSkip > 0) {
193 long skipped = skip(numToSkip);
194 if (skipped <= 0) {
195 throw new RuntimeException("failed to skip current tar"
196 + " entry");
197 }
198 numToSkip -= skipped;
199 }
200
201 readBuf = null;
202 }
203
204 byte[] headerBuf = buffer.readRecord();
205
206 if (headerBuf == null) {
207 if (debug) {
208 System.err.println("READ NULL RECORD");
209 }
210 hasHitEOF = true;
211 } else if (buffer.isEOFRecord(headerBuf)) {
212 if (debug) {
213 System.err.println("READ EOF RECORD");
214 }
215 hasHitEOF = true;
216 }
217
218 if (hasHitEOF) {
219 currEntry = null;
220 } else {
221 currEntry = new TarArchiveEntry(headerBuf);
222
223 if (debug) {
224 System.err.println("TarInputStream: SET CURRENTRY '"
225 + currEntry.getName()
226 + "' size = "
227 + currEntry.getSize());
228 }
229
230 entryOffset = 0;
231
232 entrySize = currEntry.getSize();
233 }
234
235 if (currEntry != null && currEntry.isGNULongNameEntry()) {
236 // read in the name
237 StringBuffer longName = new StringBuffer();
238 byte[] buf = new byte[SMALL_BUFFER_SIZE];
239 int length = 0;
240 while ((length = read(buf)) >= 0) {
241 longName.append(new String(buf, 0, length));
242 }
243 getNextEntry();
244 if (currEntry == null) {
245 // Bugzilla: 40334
246 // Malformed tar file - long entry name not followed by entry
247 return null;
248 }
249 // remove trailing null terminator
250 if (longName.length() > 0
251 && longName.charAt(longName.length() - 1) == 0) {
252 longName.deleteCharAt(longName.length() - 1);
253 }
254 currEntry.setName(longName.toString());
255 }
256
257 return currEntry;
258 }
259
260 public ArchiveEntry getNextEntry() throws IOException {
261 return getNextTarEntry();
262 }
263
264 /**
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000265 * Reads bytes from the current tar archive entry.
266 *
267 * This method is aware of the boundaries of the current
268 * entry in the archive and will deal with them as if they
269 * were this stream's start and EOF.
270 *
271 * @param buf The buffer into which to place bytes read.
272 * @param offset The offset at which to place bytes read.
273 * @param numToRead The number of bytes to read.
274 * @return The number of bytes read, or -1 at EOF.
275 * @throws IOException on error
276 */
277 public int read(byte[] buf, int offset, int numToRead) throws IOException {
278 int totalRead = 0;
279
280 if (entryOffset >= entrySize) {
281 return -1;
282 }
283
284 if ((numToRead + entryOffset) > entrySize) {
285 numToRead = (int) (entrySize - entryOffset);
286 }
287
288 if (readBuf != null) {
289 int sz = (numToRead > readBuf.length) ? readBuf.length
290 : numToRead;
291
292 System.arraycopy(readBuf, 0, buf, offset, sz);
293
294 if (sz >= readBuf.length) {
295 readBuf = null;
296 } else {
297 int newLen = readBuf.length - sz;
298 byte[] newBuf = new byte[newLen];
299
300 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
301
302 readBuf = newBuf;
303 }
304
305 totalRead += sz;
306 numToRead -= sz;
307 offset += sz;
308 }
309
310 while (numToRead > 0) {
311 byte[] rec = buffer.readRecord();
312
313 if (rec == null) {
314 // Unexpected EOF!
315 throw new IOException("unexpected EOF with " + numToRead
316 + " bytes unread");
317 }
318
319 int sz = numToRead;
320 int recLen = rec.length;
321
322 if (recLen > sz) {
323 System.arraycopy(rec, 0, buf, offset, sz);
324
325 readBuf = new byte[recLen - sz];
326
327 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
328 } else {
329 sz = recLen;
330
331 System.arraycopy(rec, 0, buf, offset, recLen);
332 }
333
334 totalRead += sz;
335 numToRead -= sz;
336 offset += sz;
337 }
338
339 entryOffset += totalRead;
340
341 return totalRead;
342 }
343
344 /**
345 * Copies the contents of the current tar archive entry directly into
346 * an output stream.
347 *
348 * @param out The OutputStream into which to write the entry's data.
349 * @throws IOException on error
350 */
351 public void copyEntryContents(OutputStream out) throws IOException {
352 byte[] buf = new byte[LARGE_BUFFER_SIZE];
353
354 while (true) {
355 int numRead = read(buf, 0, buf.length);
356
357 if (numRead == -1) {
358 break;
359 }
360
361 out.write(buf, 0, numRead);
362 }
363 }
364
Stefan Bodewig41f4a202009-03-20 15:42:37 +0000365 protected final TarArchiveEntry getCurrentEntry() {
366 return currEntry;
367 }
368
369 protected final void setCurrentEntry(TarArchiveEntry e) {
370 currEntry = e;
371 }
372
373 protected final boolean isAtEOF() {
374 return hasHitEOF;
375 }
376
377 protected final void setAtEOF(boolean b) {
378 hasHitEOF = b;
379 }
380
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000381 // ArchiveInputStream
382
383 public static boolean matches(byte[] signature, int length) {
Sebastian Bazley8118f822009-04-02 23:34:48 +0000384 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000385 return false;
386 }
387
Sebastian Bazley8118f822009-04-02 23:34:48 +0000388 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
389 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
390 &&
391 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
392 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
393 ){
394 return true;
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000395 }
Sebastian Bazley8118f822009-04-02 23:34:48 +0000396 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
397 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
398 &&
399 (
400 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
401 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
402 ||
403 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
404 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
405 )
406 ){
407 return true;
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000408 }
Sebastian Bazley8118f822009-04-02 23:34:48 +0000409 return false;
Stefan Bodewig0a986c62009-02-12 03:13:10 +0000410 }
411
412}