Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1 | /* |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | * contributor license agreements. See the NOTICE file distributed with |
| 4 | * this work for additional information regarding copyright ownership. |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | * (the "License"); you may not use this file except in compliance with |
| 7 | * the License. You may obtain a copy of the License at |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 8 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 10 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 17 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 18 | |
| 19 | /* |
| 20 | * This package is based on the work done by Timothy Gerard Endres |
| 21 | * (time@ice.com) to whom the Ant project is very grateful for his great code. |
| 22 | */ |
| 23 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 24 | package org.apache.commons.compress.archivers.tar; |
| 25 | |
| 26 | import java.io.FilterInputStream; |
| 27 | import java.io.IOException; |
| 28 | import java.io.InputStream; |
| 29 | import java.io.OutputStream; |
| 30 | |
| 31 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 32 | * The TarInputStream reads a UNIX tar archive as an InputStream. |
| 33 | * methods are provided to position at each successive entry in |
| 34 | * the archive, and the read each entry as a normal input stream |
| 35 | * using read(). |
| 36 | * |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 37 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 38 | public class TarInputStream extends FilterInputStream { |
| 39 | private static final int SMALL_BUFFER_SIZE = 256; |
| 40 | private static final int BUFFER_SIZE = 8 * 1024; |
| 41 | private static final int LARGE_BUFFER_SIZE = 32 * 1024; |
| 42 | private static final int BYTE_MASK = 0xFF; |
| 43 | |
| 44 | // CheckStyle:VisibilityModifier OFF - bc |
| 45 | protected boolean debug; |
| 46 | protected boolean hasHitEOF; |
| 47 | protected long entrySize; |
| 48 | protected long entryOffset; |
| 49 | protected byte[] readBuf; |
| 50 | protected TarBuffer buffer; |
| 51 | protected TarArchiveEntry currEntry; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 52 | |
| 53 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 54 | * This contents of this array is not used at all in this class, |
| 55 | * it is only here to avoid repreated object creation during calls |
| 56 | * to the no-arg read method. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 57 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 58 | protected byte[] oneBuf; |
| 59 | |
| 60 | // CheckStyle:VisibilityModifier ON |
| 61 | |
| 62 | /** |
| 63 | * Constructor for TarInputStream. |
| 64 | * @param is the input stream to use |
| 65 | */ |
| 66 | public TarInputStream(InputStream is) { |
| 67 | this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 68 | } |
| 69 | |
| 70 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 71 | * Constructor for TarInputStream. |
| 72 | * @param is the input stream to use |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 73 | * @param blockSize the block size to use |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 74 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 75 | public TarInputStream(InputStream is, int blockSize) { |
| 76 | this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 77 | } |
| 78 | |
| 79 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 80 | * Constructor for TarInputStream. |
| 81 | * @param is the input stream to use |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 82 | * @param blockSize the block size to use |
| 83 | * @param recordSize the record size to use |
| 84 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 85 | public TarInputStream(InputStream is, int blockSize, int recordSize) { |
| 86 | super(is); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 87 | |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 88 | this.buffer = new TarBuffer(is, blockSize, recordSize); |
| 89 | this.readBuf = null; |
| 90 | this.oneBuf = new byte[1]; |
| 91 | this.debug = false; |
| 92 | this.hasHitEOF = false; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 93 | } |
| 94 | |
| 95 | /** |
| 96 | * Sets the debugging flag. |
| 97 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 98 | * @param debug True to turn on debugging. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 99 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 100 | public void setDebug(boolean debug) { |
| 101 | this.debug = debug; |
| 102 | buffer.setDebug(debug); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 103 | } |
| 104 | |
| 105 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 106 | * Closes this stream. Calls the TarBuffer's close() method. |
| 107 | * @throws IOException on error |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 108 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 109 | public void close() throws IOException { |
| 110 | buffer.close(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 111 | } |
| 112 | |
| 113 | /** |
| 114 | * Get the record size being used by this stream's TarBuffer. |
| 115 | * |
| 116 | * @return The TarBuffer record size. |
| 117 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 118 | public int getRecordSize() { |
| 119 | return buffer.getRecordSize(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 123 | * Get the available data that can be read from the current |
| 124 | * entry in the archive. This does not indicate how much data |
| 125 | * is left in the entire archive, only in the current entry. |
| 126 | * This value is determined from the entry's size header field |
| 127 | * and the amount of data already read from the current entry. |
| 128 | * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE |
| 129 | * bytes are left in the current entry in the archive. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 130 | * |
| 131 | * @return The number of available bytes for the current entry. |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 132 | * @throws IOException for signature |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 133 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 134 | public int available() throws IOException { |
| 135 | if (entrySize - entryOffset > Integer.MAX_VALUE) { |
| 136 | return Integer.MAX_VALUE; |
| 137 | } |
| 138 | return (int) (entrySize - entryOffset); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 139 | } |
| 140 | |
| 141 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 142 | * Skip bytes in the input buffer. This skips bytes in the |
| 143 | * current entry's data, not the entire archive, and will |
| 144 | * stop at the end of the current entry's data if the number |
| 145 | * to skip extends beyond that point. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 146 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 147 | * @param numToSkip The number of bytes to skip. |
| 148 | * @return the number actually skipped |
| 149 | * @throws IOException on error |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 150 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 151 | public long skip(long numToSkip) throws IOException { |
| 152 | // REVIEW |
| 153 | // This is horribly inefficient, but it ensures that we |
| 154 | // properly skip over bytes via the TarBuffer... |
| 155 | // |
| 156 | byte[] skipBuf = new byte[BUFFER_SIZE]; |
| 157 | long skip = numToSkip; |
| 158 | while (skip > 0) { |
| 159 | int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip); |
| 160 | int numRead = read(skipBuf, 0, realSkip); |
| 161 | if (numRead == -1) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 162 | break; |
| 163 | } |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 164 | skip -= numRead; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 165 | } |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 166 | return (numToSkip - skip); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 167 | } |
| 168 | |
| 169 | /** |
| 170 | * Since we do not support marking just yet, we return false. |
| 171 | * |
| 172 | * @return False. |
| 173 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 174 | public boolean markSupported() { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 175 | return false; |
| 176 | } |
| 177 | |
| 178 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 179 | * Since we do not support marking just yet, we do nothing. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 180 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 181 | * @param markLimit The limit to mark. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 182 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 183 | public void mark(int markLimit) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 184 | } |
| 185 | |
| 186 | /** |
| 187 | * Since we do not support marking just yet, we do nothing. |
| 188 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 189 | public void reset() { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 190 | } |
| 191 | |
| 192 | /** |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 193 | * Get the next entry in this tar archive. This will skip |
| 194 | * over any remaining data in the current entry, if there |
| 195 | * is one, and place the input stream at the header of the |
| 196 | * next entry, and read the header and instantiate a new |
| 197 | * TarEntry from the header bytes and return that entry. |
| 198 | * If there are no more entries in the archive, null will |
| 199 | * be returned to indicate that the end of the archive has |
| 200 | * been reached. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 201 | * |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 202 | * @return The next TarEntry in the archive, or null. |
| 203 | * @throws IOException on error |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 204 | */ |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 205 | public TarArchiveEntry getNextEntry() throws IOException { |
| 206 | if (hasHitEOF) { |
| 207 | return null; |
| 208 | } |
| 209 | |
| 210 | if (currEntry != null) { |
| 211 | long numToSkip = entrySize - entryOffset; |
| 212 | |
| 213 | if (debug) { |
| 214 | System.err.println("TarInputStream: SKIP currENTRY '" |
| 215 | + currEntry.getName() + "' SZ " |
| 216 | + entrySize + " OFF " |
| 217 | + entryOffset + " skipping " |
| 218 | + numToSkip + " bytes"); |
| 219 | } |
| 220 | |
| 221 | if (numToSkip > 0) { |
| 222 | skip(numToSkip); |
| 223 | } |
| 224 | |
| 225 | readBuf = null; |
| 226 | } |
| 227 | |
| 228 | byte[] headerBuf = buffer.readRecord(); |
| 229 | |
| 230 | if (headerBuf == null) { |
| 231 | if (debug) { |
| 232 | System.err.println("READ NULL RECORD"); |
| 233 | } |
| 234 | hasHitEOF = true; |
| 235 | } else if (buffer.isEOFRecord(headerBuf)) { |
| 236 | if (debug) { |
| 237 | System.err.println("READ EOF RECORD"); |
| 238 | } |
| 239 | hasHitEOF = true; |
| 240 | } |
| 241 | |
| 242 | if (hasHitEOF) { |
| 243 | currEntry = null; |
| 244 | } else { |
| 245 | currEntry = new TarArchiveEntry(headerBuf); |
| 246 | |
| 247 | if (debug) { |
| 248 | System.err.println("TarInputStream: SET CURRENTRY '" |
| 249 | + currEntry.getName() |
| 250 | + "' size = " |
| 251 | + currEntry.getSize()); |
| 252 | } |
| 253 | |
| 254 | entryOffset = 0; |
| 255 | |
| 256 | entrySize = currEntry.getSize(); |
| 257 | } |
| 258 | |
| 259 | if (currEntry != null && currEntry.isGNULongNameEntry()) { |
| 260 | // read in the name |
| 261 | StringBuffer longName = new StringBuffer(); |
| 262 | byte[] buf = new byte[SMALL_BUFFER_SIZE]; |
| 263 | int length = 0; |
| 264 | while ((length = read(buf)) >= 0) { |
| 265 | longName.append(new String(buf, 0, length)); |
| 266 | } |
| 267 | getNextEntry(); |
| 268 | if (currEntry == null) { |
| 269 | // Bugzilla: 40334 |
| 270 | // Malformed tar file - long entry name not followed by entry |
| 271 | return null; |
| 272 | } |
| 273 | // remove trailing null terminator |
| 274 | if (longName.length() > 0 |
| 275 | && longName.charAt(longName.length() - 1) == 0) { |
| 276 | longName.deleteCharAt(longName.length() - 1); |
| 277 | } |
| 278 | currEntry.setName(longName.toString()); |
| 279 | } |
| 280 | |
| 281 | return currEntry; |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * Reads a byte from the current tar archive entry. |
| 286 | * |
| 287 | * This method simply calls read( byte[], int, int ). |
| 288 | * |
| 289 | * @return The byte read, or -1 at EOF. |
| 290 | * @throws IOException on error |
| 291 | */ |
| 292 | public int read() throws IOException { |
| 293 | int num = read(oneBuf, 0, 1); |
| 294 | return num == -1 ? -1 : ((int) oneBuf[0]) & BYTE_MASK; |
| 295 | } |
| 296 | |
| 297 | /** |
| 298 | * Reads bytes from the current tar archive entry. |
| 299 | * |
| 300 | * This method is aware of the boundaries of the current |
| 301 | * entry in the archive and will deal with them as if they |
| 302 | * were this stream's start and EOF. |
| 303 | * |
| 304 | * @param buf The buffer into which to place bytes read. |
| 305 | * @param offset The offset at which to place bytes read. |
| 306 | * @param numToRead The number of bytes to read. |
| 307 | * @return The number of bytes read, or -1 at EOF. |
| 308 | * @throws IOException on error |
| 309 | */ |
| 310 | public int read(byte[] buf, int offset, int numToRead) throws IOException { |
| 311 | int totalRead = 0; |
| 312 | |
| 313 | if (entryOffset >= entrySize) { |
| 314 | return -1; |
| 315 | } |
| 316 | |
| 317 | if ((numToRead + entryOffset) > entrySize) { |
| 318 | numToRead = (int) (entrySize - entryOffset); |
| 319 | } |
| 320 | |
| 321 | if (readBuf != null) { |
| 322 | int sz = (numToRead > readBuf.length) ? readBuf.length |
| 323 | : numToRead; |
| 324 | |
| 325 | System.arraycopy(readBuf, 0, buf, offset, sz); |
| 326 | |
| 327 | if (sz >= readBuf.length) { |
| 328 | readBuf = null; |
| 329 | } else { |
| 330 | int newLen = readBuf.length - sz; |
| 331 | byte[] newBuf = new byte[newLen]; |
| 332 | |
| 333 | System.arraycopy(readBuf, sz, newBuf, 0, newLen); |
| 334 | |
| 335 | readBuf = newBuf; |
| 336 | } |
| 337 | |
| 338 | totalRead += sz; |
| 339 | numToRead -= sz; |
| 340 | offset += sz; |
| 341 | } |
| 342 | |
| 343 | while (numToRead > 0) { |
| 344 | byte[] rec = buffer.readRecord(); |
| 345 | |
| 346 | if (rec == null) { |
| 347 | // Unexpected EOF! |
| 348 | throw new IOException("unexpected EOF with " + numToRead |
| 349 | + " bytes unread"); |
| 350 | } |
| 351 | |
| 352 | int sz = numToRead; |
| 353 | int recLen = rec.length; |
| 354 | |
| 355 | if (recLen > sz) { |
| 356 | System.arraycopy(rec, 0, buf, offset, sz); |
| 357 | |
| 358 | readBuf = new byte[recLen - sz]; |
| 359 | |
| 360 | System.arraycopy(rec, sz, readBuf, 0, recLen - sz); |
| 361 | } else { |
| 362 | sz = recLen; |
| 363 | |
| 364 | System.arraycopy(rec, 0, buf, offset, recLen); |
| 365 | } |
| 366 | |
| 367 | totalRead += sz; |
| 368 | numToRead -= sz; |
| 369 | offset += sz; |
| 370 | } |
| 371 | |
| 372 | entryOffset += totalRead; |
| 373 | |
| 374 | return totalRead; |
| 375 | } |
| 376 | |
| 377 | /** |
| 378 | * Copies the contents of the current tar archive entry directly into |
| 379 | * an output stream. |
| 380 | * |
| 381 | * @param out The OutputStream into which to write the entry's data. |
| 382 | * @throws IOException on error |
| 383 | */ |
| 384 | public void copyEntryContents(OutputStream out) throws IOException { |
| 385 | byte[] buf = new byte[LARGE_BUFFER_SIZE]; |
| 386 | |
| 387 | while (true) { |
| 388 | int numRead = read(buf, 0, buf.length); |
| 389 | |
| 390 | if (numRead == -1) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 391 | break; |
| 392 | } |
| 393 | |
Torsten Curdt | 46ad24d | 2009-01-08 11:09:25 +0000 | [diff] [blame^] | 394 | out.write(buf, 0, numRead); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 395 | } |
| 396 | } |
| 397 | } |