Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | * contributor license agreements. See the NOTICE file distributed with |
| 4 | * this work for additional information regarding copyright ownership. |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | * (the "License"); you may not use this file except in compliance with |
| 7 | * the License. You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
| 17 | */ |
| 18 | |
| 19 | /* |
| 20 | * This package is based on the work done by Timothy Gerard Endres |
| 21 | * (time@ice.com) to whom the Ant project is very grateful for his great code. |
| 22 | */ |
| 23 | |
| 24 | package org.apache.commons.compress.archivers.tar; |
| 25 | |
| 26 | import java.io.IOException; |
| 27 | import java.io.InputStream; |
| 28 | import java.io.OutputStream; |
| 29 | import org.apache.commons.compress.archivers.ArchiveEntry; |
| 30 | import org.apache.commons.compress.archivers.ArchiveInputStream; |
Sebastian Bazley | 8118f82 | 2009-04-02 23:34:48 +0000 | [diff] [blame^] | 31 | import org.apache.commons.compress.utils.ArchiveUtils; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 32 | |
| 33 | /** |
| 34 | * The TarInputStream reads a UNIX tar archive as an InputStream. |
| 35 | * methods are provided to position at each successive entry in |
| 36 | * the archive, and the read each entry as a normal input stream |
| 37 | * using read(). |
Sebastian Bazley | 99870ef | 2009-03-28 00:04:36 +0000 | [diff] [blame] | 38 | * @NotThreadSafe |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 39 | */ |
| 40 | public class TarArchiveInputStream extends ArchiveInputStream { |
| 41 | private static final int SMALL_BUFFER_SIZE = 256; |
| 42 | private static final int BUFFER_SIZE = 8 * 1024; |
| 43 | private static final int LARGE_BUFFER_SIZE = 32 * 1024; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 44 | |
Stefan Bodewig | 41f4a20 | 2009-03-20 15:42:37 +0000 | [diff] [blame] | 45 | private boolean debug; |
| 46 | private boolean hasHitEOF; |
| 47 | private long entrySize; |
| 48 | private long entryOffset; |
| 49 | private byte[] readBuf; |
| 50 | protected final TarBuffer buffer; |
| 51 | private TarArchiveEntry currEntry; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 52 | |
| 53 | /** |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 54 | * Constructor for TarInputStream. |
| 55 | * @param is the input stream to use |
| 56 | */ |
| 57 | public TarArchiveInputStream(InputStream is) { |
| 58 | this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Constructor for TarInputStream. |
| 63 | * @param is the input stream to use |
| 64 | * @param blockSize the block size to use |
| 65 | */ |
| 66 | public TarArchiveInputStream(InputStream is, int blockSize) { |
| 67 | this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * Constructor for TarInputStream. |
| 72 | * @param is the input stream to use |
| 73 | * @param blockSize the block size to use |
| 74 | * @param recordSize the record size to use |
| 75 | */ |
| 76 | public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) { |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 77 | this.buffer = new TarBuffer(is, blockSize, recordSize); |
| 78 | this.readBuf = null; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 79 | this.debug = false; |
| 80 | this.hasHitEOF = false; |
| 81 | } |
| 82 | |
| 83 | /** |
| 84 | * Sets the debugging flag. |
| 85 | * |
| 86 | * @param debug True to turn on debugging. |
| 87 | */ |
| 88 | public void setDebug(boolean debug) { |
| 89 | this.debug = debug; |
| 90 | buffer.setDebug(debug); |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * Closes this stream. Calls the TarBuffer's close() method. |
| 95 | * @throws IOException on error |
| 96 | */ |
| 97 | public void close() throws IOException { |
| 98 | buffer.close(); |
| 99 | } |
| 100 | |
| 101 | /** |
| 102 | * Get the record size being used by this stream's TarBuffer. |
| 103 | * |
| 104 | * @return The TarBuffer record size. |
| 105 | */ |
| 106 | public int getRecordSize() { |
| 107 | return buffer.getRecordSize(); |
| 108 | } |
| 109 | |
| 110 | /** |
| 111 | * Get the available data that can be read from the current |
| 112 | * entry in the archive. This does not indicate how much data |
| 113 | * is left in the entire archive, only in the current entry. |
| 114 | * This value is determined from the entry's size header field |
| 115 | * and the amount of data already read from the current entry. |
| 116 | * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE |
| 117 | * bytes are left in the current entry in the archive. |
| 118 | * |
| 119 | * @return The number of available bytes for the current entry. |
| 120 | * @throws IOException for signature |
| 121 | */ |
| 122 | public int available() throws IOException { |
| 123 | if (entrySize - entryOffset > Integer.MAX_VALUE) { |
| 124 | return Integer.MAX_VALUE; |
| 125 | } |
| 126 | return (int) (entrySize - entryOffset); |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Skip bytes in the input buffer. This skips bytes in the |
| 131 | * current entry's data, not the entire archive, and will |
| 132 | * stop at the end of the current entry's data if the number |
| 133 | * to skip extends beyond that point. |
| 134 | * |
| 135 | * @param numToSkip The number of bytes to skip. |
| 136 | * @return the number actually skipped |
| 137 | * @throws IOException on error |
| 138 | */ |
| 139 | public long skip(long numToSkip) throws IOException { |
| 140 | // REVIEW |
| 141 | // This is horribly inefficient, but it ensures that we |
| 142 | // properly skip over bytes via the TarBuffer... |
| 143 | // |
| 144 | byte[] skipBuf = new byte[BUFFER_SIZE]; |
| 145 | long skip = numToSkip; |
| 146 | while (skip > 0) { |
| 147 | int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); |
| 148 | int numRead = read(skipBuf, 0, realSkip); |
| 149 | if (numRead == -1) { |
| 150 | break; |
| 151 | } |
| 152 | skip -= numRead; |
| 153 | } |
| 154 | return (numToSkip - skip); |
| 155 | } |
| 156 | |
| 157 | /** |
| 158 | * Since we do not support marking just yet, we do nothing. |
| 159 | */ |
| 160 | public void reset() { |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * Get the next entry in this tar archive. This will skip |
| 165 | * over any remaining data in the current entry, if there |
| 166 | * is one, and place the input stream at the header of the |
| 167 | * next entry, and read the header and instantiate a new |
| 168 | * TarEntry from the header bytes and return that entry. |
| 169 | * If there are no more entries in the archive, null will |
| 170 | * be returned to indicate that the end of the archive has |
| 171 | * been reached. |
| 172 | * |
| 173 | * @return The next TarEntry in the archive, or null. |
| 174 | * @throws IOException on error |
| 175 | */ |
| 176 | public TarArchiveEntry getNextTarEntry() throws IOException { |
| 177 | if (hasHitEOF) { |
| 178 | return null; |
| 179 | } |
| 180 | |
| 181 | if (currEntry != null) { |
| 182 | long numToSkip = entrySize - entryOffset; |
| 183 | |
| 184 | if (debug) { |
| 185 | System.err.println("TarInputStream: SKIP currENTRY '" |
| 186 | + currEntry.getName() + "' SZ " |
| 187 | + entrySize + " OFF " |
| 188 | + entryOffset + " skipping " |
| 189 | + numToSkip + " bytes"); |
| 190 | } |
| 191 | |
| 192 | while (numToSkip > 0) { |
| 193 | long skipped = skip(numToSkip); |
| 194 | if (skipped <= 0) { |
| 195 | throw new RuntimeException("failed to skip current tar" |
| 196 | + " entry"); |
| 197 | } |
| 198 | numToSkip -= skipped; |
| 199 | } |
| 200 | |
| 201 | readBuf = null; |
| 202 | } |
| 203 | |
| 204 | byte[] headerBuf = buffer.readRecord(); |
| 205 | |
| 206 | if (headerBuf == null) { |
| 207 | if (debug) { |
| 208 | System.err.println("READ NULL RECORD"); |
| 209 | } |
| 210 | hasHitEOF = true; |
| 211 | } else if (buffer.isEOFRecord(headerBuf)) { |
| 212 | if (debug) { |
| 213 | System.err.println("READ EOF RECORD"); |
| 214 | } |
| 215 | hasHitEOF = true; |
| 216 | } |
| 217 | |
| 218 | if (hasHitEOF) { |
| 219 | currEntry = null; |
| 220 | } else { |
| 221 | currEntry = new TarArchiveEntry(headerBuf); |
| 222 | |
| 223 | if (debug) { |
| 224 | System.err.println("TarInputStream: SET CURRENTRY '" |
| 225 | + currEntry.getName() |
| 226 | + "' size = " |
| 227 | + currEntry.getSize()); |
| 228 | } |
| 229 | |
| 230 | entryOffset = 0; |
| 231 | |
| 232 | entrySize = currEntry.getSize(); |
| 233 | } |
| 234 | |
| 235 | if (currEntry != null && currEntry.isGNULongNameEntry()) { |
| 236 | // read in the name |
| 237 | StringBuffer longName = new StringBuffer(); |
| 238 | byte[] buf = new byte[SMALL_BUFFER_SIZE]; |
| 239 | int length = 0; |
| 240 | while ((length = read(buf)) >= 0) { |
| 241 | longName.append(new String(buf, 0, length)); |
| 242 | } |
| 243 | getNextEntry(); |
| 244 | if (currEntry == null) { |
| 245 | // Bugzilla: 40334 |
| 246 | // Malformed tar file - long entry name not followed by entry |
| 247 | return null; |
| 248 | } |
| 249 | // remove trailing null terminator |
| 250 | if (longName.length() > 0 |
| 251 | && longName.charAt(longName.length() - 1) == 0) { |
| 252 | longName.deleteCharAt(longName.length() - 1); |
| 253 | } |
| 254 | currEntry.setName(longName.toString()); |
| 255 | } |
| 256 | |
| 257 | return currEntry; |
| 258 | } |
| 259 | |
| 260 | public ArchiveEntry getNextEntry() throws IOException { |
| 261 | return getNextTarEntry(); |
| 262 | } |
| 263 | |
| 264 | /** |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 265 | * Reads bytes from the current tar archive entry. |
| 266 | * |
| 267 | * This method is aware of the boundaries of the current |
| 268 | * entry in the archive and will deal with them as if they |
| 269 | * were this stream's start and EOF. |
| 270 | * |
| 271 | * @param buf The buffer into which to place bytes read. |
| 272 | * @param offset The offset at which to place bytes read. |
| 273 | * @param numToRead The number of bytes to read. |
| 274 | * @return The number of bytes read, or -1 at EOF. |
| 275 | * @throws IOException on error |
| 276 | */ |
| 277 | public int read(byte[] buf, int offset, int numToRead) throws IOException { |
| 278 | int totalRead = 0; |
| 279 | |
| 280 | if (entryOffset >= entrySize) { |
| 281 | return -1; |
| 282 | } |
| 283 | |
| 284 | if ((numToRead + entryOffset) > entrySize) { |
| 285 | numToRead = (int) (entrySize - entryOffset); |
| 286 | } |
| 287 | |
| 288 | if (readBuf != null) { |
| 289 | int sz = (numToRead > readBuf.length) ? readBuf.length |
| 290 | : numToRead; |
| 291 | |
| 292 | System.arraycopy(readBuf, 0, buf, offset, sz); |
| 293 | |
| 294 | if (sz >= readBuf.length) { |
| 295 | readBuf = null; |
| 296 | } else { |
| 297 | int newLen = readBuf.length - sz; |
| 298 | byte[] newBuf = new byte[newLen]; |
| 299 | |
| 300 | System.arraycopy(readBuf, sz, newBuf, 0, newLen); |
| 301 | |
| 302 | readBuf = newBuf; |
| 303 | } |
| 304 | |
| 305 | totalRead += sz; |
| 306 | numToRead -= sz; |
| 307 | offset += sz; |
| 308 | } |
| 309 | |
| 310 | while (numToRead > 0) { |
| 311 | byte[] rec = buffer.readRecord(); |
| 312 | |
| 313 | if (rec == null) { |
| 314 | // Unexpected EOF! |
| 315 | throw new IOException("unexpected EOF with " + numToRead |
| 316 | + " bytes unread"); |
| 317 | } |
| 318 | |
| 319 | int sz = numToRead; |
| 320 | int recLen = rec.length; |
| 321 | |
| 322 | if (recLen > sz) { |
| 323 | System.arraycopy(rec, 0, buf, offset, sz); |
| 324 | |
| 325 | readBuf = new byte[recLen - sz]; |
| 326 | |
| 327 | System.arraycopy(rec, sz, readBuf, 0, recLen - sz); |
| 328 | } else { |
| 329 | sz = recLen; |
| 330 | |
| 331 | System.arraycopy(rec, 0, buf, offset, recLen); |
| 332 | } |
| 333 | |
| 334 | totalRead += sz; |
| 335 | numToRead -= sz; |
| 336 | offset += sz; |
| 337 | } |
| 338 | |
| 339 | entryOffset += totalRead; |
| 340 | |
| 341 | return totalRead; |
| 342 | } |
| 343 | |
| 344 | /** |
| 345 | * Copies the contents of the current tar archive entry directly into |
| 346 | * an output stream. |
| 347 | * |
| 348 | * @param out The OutputStream into which to write the entry's data. |
| 349 | * @throws IOException on error |
| 350 | */ |
| 351 | public void copyEntryContents(OutputStream out) throws IOException { |
| 352 | byte[] buf = new byte[LARGE_BUFFER_SIZE]; |
| 353 | |
| 354 | while (true) { |
| 355 | int numRead = read(buf, 0, buf.length); |
| 356 | |
| 357 | if (numRead == -1) { |
| 358 | break; |
| 359 | } |
| 360 | |
| 361 | out.write(buf, 0, numRead); |
| 362 | } |
| 363 | } |
| 364 | |
Stefan Bodewig | 41f4a20 | 2009-03-20 15:42:37 +0000 | [diff] [blame] | 365 | protected final TarArchiveEntry getCurrentEntry() { |
| 366 | return currEntry; |
| 367 | } |
| 368 | |
| 369 | protected final void setCurrentEntry(TarArchiveEntry e) { |
| 370 | currEntry = e; |
| 371 | } |
| 372 | |
| 373 | protected final boolean isAtEOF() { |
| 374 | return hasHitEOF; |
| 375 | } |
| 376 | |
| 377 | protected final void setAtEOF(boolean b) { |
| 378 | hasHitEOF = b; |
| 379 | } |
| 380 | |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 381 | // ArchiveInputStream |
| 382 | |
| 383 | public static boolean matches(byte[] signature, int length) { |
Sebastian Bazley | 8118f82 | 2009-04-02 23:34:48 +0000 | [diff] [blame^] | 384 | if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) { |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 385 | return false; |
| 386 | } |
| 387 | |
Sebastian Bazley | 8118f82 | 2009-04-02 23:34:48 +0000 | [diff] [blame^] | 388 | if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX, |
| 389 | signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) |
| 390 | && |
| 391 | ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX, |
| 392 | signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| 393 | ){ |
| 394 | return true; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 395 | } |
Sebastian Bazley | 8118f82 | 2009-04-02 23:34:48 +0000 | [diff] [blame^] | 396 | if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU, |
| 397 | signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN) |
| 398 | && |
| 399 | ( |
| 400 | ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE, |
| 401 | signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| 402 | || |
| 403 | ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO, |
| 404 | signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN) |
| 405 | ) |
| 406 | ){ |
| 407 | return true; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 408 | } |
Sebastian Bazley | 8118f82 | 2009-04-02 23:34:48 +0000 | [diff] [blame^] | 409 | return false; |
Stefan Bodewig | 0a986c6 | 2009-02-12 03:13:10 +0000 | [diff] [blame] | 410 | } |
| 411 | |
| 412 | } |