blob: 5625c14b021e9f335bb16c5575f58c24950ff828 [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19package org.apache.commons.compress.archivers.zip;
20
Stefan Bodewig03e94a42010-03-19 15:08:23 +000021import java.io.ByteArrayInputStream;
22import java.io.ByteArrayOutputStream;
Stefan Bodewig008ca942009-03-26 22:29:59 +000023import java.io.EOFException;
Torsten Curdtca165392008-07-10 10:17:44 +000024import java.io.IOException;
25import java.io.InputStream;
Stefan Bodewig008ca942009-03-26 22:29:59 +000026import java.io.PushbackInputStream;
Emmanuel Bourg29f975e2013-12-18 23:14:26 +000027import java.nio.ByteBuffer;
Stefan Bodewig008ca942009-03-26 22:29:59 +000028import java.util.zip.CRC32;
29import java.util.zip.DataFormatException;
30import java.util.zip.Inflater;
Gary D. Gregory04173632012-04-01 13:26:55 +000031import java.util.zip.ZipEntry;
Stefan Bodewig008ca942009-03-26 22:29:59 +000032import java.util.zip.ZipException;
Torsten Curdtca165392008-07-10 10:17:44 +000033
34import org.apache.commons.compress.archivers.ArchiveEntry;
35import org.apache.commons.compress.archivers.ArchiveInputStream;
Stefan Bodewig3685eea2013-10-04 13:17:32 +000036import org.apache.commons.compress.utils.IOUtils;
Torsten Curdtca165392008-07-10 10:17:44 +000037
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +000038import static org.apache.commons.compress.archivers.zip.ZipConstants.DWORD;
Stefan Bodewige53e88a2011-07-25 13:28:31 +000039import static org.apache.commons.compress.archivers.zip.ZipConstants.SHORT;
40import static org.apache.commons.compress.archivers.zip.ZipConstants.WORD;
Stefan Bodewigcadc5272011-08-03 14:33:44 +000041import static org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
Stefan Bodewige53e88a2011-07-25 13:28:31 +000042
Sebastian Bazley99870ef2009-03-28 00:04:36 +000043/**
Sebastian Bazleyf7f6b182009-03-31 10:36:25 +000044 * Implements an input stream that can read Zip archives.
Stefan Bodewig794c20f2011-08-11 14:08:54 +000045 *
Stefan Bodewig794c20f2011-08-11 14:08:54 +000046 * <p>As of Apache Commons Compress it transparently supports Zip64
47 * extensions and thus individual entries and archives larger than 4
48 * GB or with more than 65536 entries.</p>
49 *
Stefan Bodewig171c3ac2015-08-09 16:06:09 +000050 * <p>The {@link ZipFile} class is preferred when reading from files
51 * as {@link ZipArchiveInputStream} is limited by not being able to
52 * read the central directory header before returning entries. In
53 * particular {@link ZipArchiveInputStream}</p>
54 *
55 * <ul>
56 *
57 * <li>may return entries that are not part of the central directory
58 * at all and shouldn't be considered part of the archive.</li>
59 *
60 * <li>may return several entries with the same name.</li>
61 *
62 * <li>will not return internal or external attributes.</li>
63 *
64 * <li>may return incomplete extra field data.</li>
65 *
66 * <li>may return unknown sizes and CRC values for entries until the
67 * next entry has been reached if the archive uses the data
68 * descriptor feature.</li>
69 *
70 * </ul>
71 *
Sebastian Bazleyf7f6b182009-03-31 10:36:25 +000072 * @see ZipFile
Sebastian Bazley99870ef2009-03-28 00:04:36 +000073 * @NotThreadSafe
74 */
Torsten Curdtca165392008-07-10 10:17:44 +000075public class ZipArchiveInputStream extends ArchiveInputStream {
76
Emmanuel Bourg429462a2013-12-19 09:53:17 +000077 /** The zip encoding to use for filenames and the file comment. */
Stefan Bodewig008ca942009-03-26 22:29:59 +000078 private final ZipEncoding zipEncoding;
79
Sebastian Bazleyd50feb62015-02-16 23:13:01 +000080 // the provided encoding (for unit tests)
81 final String encoding;
82
Emmanuel Bourg429462a2013-12-19 09:53:17 +000083 /** Whether to look for and use Unicode extra fields. */
Stefan Bodewigf84dd362009-04-28 08:31:15 +000084 private final boolean useUnicodeExtraFields;
Stefan Bodewig008ca942009-03-26 22:29:59 +000085
Emmanuel Bourg429462a2013-12-19 09:53:17 +000086 /** Wrapped stream, will always be a PushbackInputStream. */
Stefan Bodewig008ca942009-03-26 22:29:59 +000087 private final InputStream in;
88
Emmanuel Bourg429462a2013-12-19 09:53:17 +000089 /** Inflater used for all deflated entries. */
Stefan Bodewig008ca942009-03-26 22:29:59 +000090 private final Inflater inf = new Inflater(true);
Stefan Bodewig04e132b2011-08-03 13:08:33 +000091
Emmanuel Bourg429462a2013-12-19 09:53:17 +000092 /** Buffer used to read from the wrapped stream. */
Emmanuel Bourg29f975e2013-12-18 23:14:26 +000093 private final ByteBuffer buf = ByteBuffer.allocate(ZipArchiveOutputStream.BUFFER_SIZE);
Emmanuel Bourg429462a2013-12-19 09:53:17 +000094
95 /** The entry that is currently being read. */
Stefan Bodewig04e132b2011-08-03 13:08:33 +000096 private CurrentEntry current = null;
Emmanuel Bourg429462a2013-12-19 09:53:17 +000097
98 /** Whether the stream has been closed. */
Stefan Bodewig008ca942009-03-26 22:29:59 +000099 private boolean closed = false;
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000100
101 /** Whether the stream has reached the central directory - and thus found all entries. */
Stefan Bodewig008ca942009-03-26 22:29:59 +0000102 private boolean hitCentralDirectory = false;
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000103
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000104 /**
105 * When reading a stored entry that uses the data descriptor this
106 * stream has to read the full entry and caches it. This is the
107 * cache.
108 */
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000109 private ByteArrayInputStream lastStoredEntry = null;
110
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000111 /** Whether the stream will try to read STORED entries that use a data descriptor. */
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000112 private boolean allowStoredEntriesWithDataDescriptor = false;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000113
114 private static final int LFH_LEN = 30;
115 /*
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000116 local file header signature WORD
117 version needed to extract SHORT
118 general purpose bit flag SHORT
119 compression method SHORT
120 last mod file time SHORT
121 last mod file date SHORT
122 crc-32 WORD
123 compressed size WORD
124 uncompressed size WORD
125 file name length SHORT
126 extra field length SHORT
Stefan Bodewig008ca942009-03-26 22:29:59 +0000127 */
Torsten Curdtca165392008-07-10 10:17:44 +0000128
Stefan Bodewig7181d542013-01-22 12:45:24 +0000129 private static final int CFH_LEN = 46;
130 /*
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000131 central file header signature WORD
132 version made by SHORT
133 version needed to extract SHORT
134 general purpose bit flag SHORT
135 compression method SHORT
136 last mod file time SHORT
137 last mod file date SHORT
138 crc-32 WORD
139 compressed size WORD
140 uncompressed size WORD
141 file name length SHORT
142 extra field length SHORT
143 file comment length SHORT
144 disk number start SHORT
145 internal file attributes SHORT
146 external file attributes WORD
147 relative offset of local header WORD
Stefan Bodewig7181d542013-01-22 12:45:24 +0000148 */
149
Stefan Bodewigcadc5272011-08-03 14:33:44 +0000150 private static final long TWO_EXP_32 = ZIP64_MAGIC + 1;
151
Sebastian Bazleya6072ce2013-01-07 22:54:12 +0000152 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000153 private final byte[] LFH_BUF = new byte[LFH_LEN];
154 private final byte[] SKIP_BUF = new byte[1024];
Stefan Bodewig7181d542013-01-22 12:45:24 +0000155 private final byte[] SHORT_BUF = new byte[SHORT];
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000156 private final byte[] WORD_BUF = new byte[WORD];
157 private final byte[] TWO_DWORD_BUF = new byte[2 * DWORD];
158
Stefan Bodewig7181d542013-01-22 12:45:24 +0000159 private int entriesRead = 0;
160
Sebastian Bazleyd50feb62015-02-16 23:13:01 +0000161 /**
162 * Create an instance using UTF-8 encoding
163 * @param inputStream the stream to wrap
164 */
Stefan Bodewig743d7c52009-02-05 11:01:35 +0000165 public ZipArchiveInputStream(InputStream inputStream) {
Stefan Bodewig2ab43b02012-07-07 19:34:11 +0000166 this(inputStream, ZipEncodingHelper.UTF8);
167 }
168
169 /**
170 * @param encoding the encoding to use for file names, use null
171 * for the platform's default encoding
172 * @since 1.5
173 */
174 public ZipArchiveInputStream(InputStream inputStream, String encoding) {
175 this(inputStream, encoding, true);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000176 }
177
178 /**
179 * @param encoding the encoding to use for file names, use null
180 * for the platform's default encoding
181 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
182 * Extra Fields (if present) to set the file names.
183 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000184 public ZipArchiveInputStream(InputStream inputStream, String encoding, boolean useUnicodeExtraFields) {
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000185 this(inputStream, encoding, useUnicodeExtraFields, false);
186 }
187
188 /**
189 * @param encoding the encoding to use for file names, use null
190 * for the platform's default encoding
191 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
192 * Extra Fields (if present) to set the file names.
193 * @param allowStoredEntriesWithDataDescriptor whether the stream
194 * will try to read STORED entries that use a data descriptor
Gary D. Gregory2bd0dd42012-04-01 13:02:39 +0000195 * @since 1.1
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000196 */
197 public ZipArchiveInputStream(InputStream inputStream,
198 String encoding,
199 boolean useUnicodeExtraFields,
200 boolean allowStoredEntriesWithDataDescriptor) {
Sebastian Bazleyd50feb62015-02-16 23:13:01 +0000201 this.encoding = encoding;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000202 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
203 this.useUnicodeExtraFields = useUnicodeExtraFields;
Emmanuel Bourg8301ee72013-12-18 22:10:19 +0000204 in = new PushbackInputStream(inputStream, buf.capacity());
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000205 this.allowStoredEntriesWithDataDescriptor =
206 allowStoredEntriesWithDataDescriptor;
Stefan Bodewigaed56112014-02-21 14:21:12 +0000207 // haven't read anything so far
208 buf.limit(0);
Stefan Bodewig743d7c52009-02-05 11:01:35 +0000209 }
Torsten Curdtca165392008-07-10 10:17:44 +0000210
Stefan Bodewiga7049ab2009-02-11 07:44:00 +0000211 public ZipArchiveEntry getNextZipEntry() throws IOException {
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000212 boolean firstEntry = true;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000213 if (closed || hitCentralDirectory) {
Stefan Bodewigfa8fea72009-02-06 08:49:49 +0000214 return null;
215 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000216 if (current != null) {
217 closeEntry();
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000218 firstEntry = false;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000219 }
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000220
Stefan Bodewig008ca942009-03-26 22:29:59 +0000221 try {
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000222 if (firstEntry) {
223 // split archives have a special signature before the
224 // first local file header - look for it and fail with
225 // the appropriate error message if this is a split
226 // archive.
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000227 readFirstLocalFileHeader(LFH_BUF);
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000228 } else {
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000229 readFully(LFH_BUF);
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000230 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000231 } catch (EOFException e) {
232 return null;
233 }
Sebastian Bazley2f69e632013-01-22 17:00:33 +0000234
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000235 ZipLong sig = new ZipLong(LFH_BUF);
Stefan Bodewig7181d542013-01-22 12:45:24 +0000236 if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000237 hitCentralDirectory = true;
Stefan Bodewig7181d542013-01-22 12:45:24 +0000238 skipRemainderOfArchive();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000239 }
240 if (!sig.equals(ZipLong.LFH_SIG)) {
241 return null;
242 }
243
244 int off = WORD;
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000245 current = new CurrentEntry();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000246
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000247 int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000248 off += SHORT;
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000249 current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile.NIBLET_MASK);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000250
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000251 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(LFH_BUF, off);
Stefan Bodewig4d68dda2010-02-19 09:56:37 +0000252 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000253 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000254 current.hasDataDescriptor = gpFlag.usesDataDescriptor();
255 current.entry.setGeneralPurposeBit(gpFlag);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000256
257 off += SHORT;
258
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000259 current.entry.setMethod(ZipShort.getValue(LFH_BUF, off));
Stefan Bodewig008ca942009-03-26 22:29:59 +0000260 off += SHORT;
261
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000262 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(LFH_BUF, off));
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000263 current.entry.setTime(time);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000264 off += WORD;
265
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000266 ZipLong size = null, cSize = null;
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000267 if (!current.hasDataDescriptor) {
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000268 current.entry.setCrc(ZipLong.getValue(LFH_BUF, off));
Stefan Bodewig008ca942009-03-26 22:29:59 +0000269 off += WORD;
270
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000271 cSize = new ZipLong(LFH_BUF, off);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000272 off += WORD;
273
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000274 size = new ZipLong(LFH_BUF, off);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000275 off += WORD;
276 } else {
277 off += 3 * WORD;
278 }
279
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000280 int fileNameLen = ZipShort.getValue(LFH_BUF, off);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000281
282 off += SHORT;
283
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000284 int extraLen = ZipShort.getValue(LFH_BUF, off);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000285 off += SHORT;
286
287 byte[] fileName = new byte[fileNameLen];
288 readFully(fileName);
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000289 current.entry.setName(entryEncoding.decode(fileName), fileName);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000290
291 byte[] extraData = new byte[extraLen];
292 readFully(extraData);
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000293 current.entry.setExtra(extraData);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000294
Stefan Bodewig2b209932010-02-19 08:41:23 +0000295 if (!hasUTF8Flag && useUnicodeExtraFields) {
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000296 ZipUtil.setNameAndCommentFromExtraFields(current.entry, fileName, null);
Stefan Bodewigf84dd362009-04-28 08:31:15 +0000297 }
Stefan Bodewig3879e472011-07-26 04:38:35 +0000298
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000299 processZip64Extra(size, cSize);
Emmanuel Bourgf9487ac2013-12-19 16:09:54 +0000300
Stefan Bodewigdeeb3552015-01-09 17:58:15 +0000301 if (current.entry.getCompressedSize() != ZipArchiveEntry.SIZE_UNKNOWN) {
Emmanuel Bourgf9487ac2013-12-19 16:09:54 +0000302 if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()) {
303 current.in = new UnshrinkingInputStream(new BoundedInputStream(in, current.entry.getCompressedSize()));
304 } else if (current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
305 current.in = new ExplodingInputStream(
306 current.entry.getGeneralPurposeBit().getSlidingDictionarySize(),
307 current.entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
308 new BoundedInputStream(in, current.entry.getCompressedSize()));
309 }
Emmanuel Bourg1b47ffe2013-12-19 12:30:01 +0000310 }
311
Stefan Bodewig7181d542013-01-22 12:45:24 +0000312 entriesRead++;
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000313 return current.entry;
314 }
315
316 /**
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000317 * Fills the given array with the first local file header and
318 * deals with splitting/spanning markers that may prefix the first
319 * LFH.
320 */
321 private void readFirstLocalFileHeader(byte[] lfh) throws IOException {
322 readFully(lfh);
323 ZipLong sig = new ZipLong(lfh);
324 if (sig.equals(ZipLong.DD_SIG)) {
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000325 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.SPLITTING);
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000326 }
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000327
Stefan Bodewige10ce2c2013-01-01 10:57:23 +0000328 if (sig.equals(ZipLong.SINGLE_SEGMENT_SPLIT_MARKER)) {
329 // The archive is not really split as only one segment was
330 // needed in the end. Just skip over the marker.
331 byte[] missedLfhBytes = new byte[4];
332 readFully(missedLfhBytes);
333 System.arraycopy(lfh, 4, lfh, 0, LFH_LEN - 4);
334 System.arraycopy(missedLfhBytes, 0, lfh, LFH_LEN - 4, 4);
335 }
Stefan Bodewig92d8c572013-01-01 10:51:42 +0000336 }
337
338 /**
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000339 * Records whether a Zip64 extra is present and sets the size
340 * information from it if sizes are 0xFFFFFFFF and the entry
341 * doesn't use a data descriptor.
342 */
343 private void processZip64Extra(ZipLong size, ZipLong cSize) {
344 Zip64ExtendedInformationExtraField z64 =
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000345 (Zip64ExtendedInformationExtraField)
346 current.entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000347 current.usesZip64 = z64 != null;
348 if (!current.hasDataDescriptor) {
Sebastian Bazleydf8b3b22013-05-14 00:19:04 +0000349 if (z64 != null // same as current.usesZip64 but avoids NPE warning
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000350 && (cSize.equals(ZipLong.ZIP64_MAGIC) || size.equals(ZipLong.ZIP64_MAGIC)) ) {
351 current.entry.setCompressedSize(z64.getCompressedSize().getLongValue());
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000352 current.entry.setSize(z64.getSize().getLongValue());
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000353 } else {
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000354 current.entry.setCompressedSize(cSize.getValue());
355 current.entry.setSize(size.getValue());
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000356 }
357 }
Stefan Bodewiga7049ab2009-02-11 07:44:00 +0000358 }
359
Stefan Bodewige53e88a2011-07-25 13:28:31 +0000360 @Override
Stefan Bodewiga7049ab2009-02-11 07:44:00 +0000361 public ArchiveEntry getNextEntry() throws IOException {
362 return getNextZipEntry();
Torsten Curdtca165392008-07-10 10:17:44 +0000363 }
364
Stefan Bodewigc7e51ed2010-02-19 10:55:27 +0000365 /**
366 * Whether this class is able to read the given entry.
367 *
368 * <p>May return false if it is set up to use encryption or a
369 * compression method that hasn't been implemented yet.</p>
Gary D. Gregory2bd0dd42012-04-01 13:02:39 +0000370 * @since 1.1
Stefan Bodewigc7e51ed2010-02-19 10:55:27 +0000371 */
Stefan Bodewige53e88a2011-07-25 13:28:31 +0000372 @Override
Stefan Bodewige0692ec2010-02-24 15:35:47 +0000373 public boolean canReadEntryData(ArchiveEntry ae) {
Stefan Bodewiga33505b2010-02-19 12:23:27 +0000374 if (ae instanceof ZipArchiveEntry) {
Stefan Bodewig02e2be62010-03-17 15:55:00 +0000375 ZipArchiveEntry ze = (ZipArchiveEntry) ae;
376 return ZipUtil.canHandleEntryData(ze)
377 && supportsDataDescriptorFor(ze);
378
Stefan Bodewiga33505b2010-02-19 12:23:27 +0000379 }
Stefan Bodewig734e6f72010-02-19 12:33:28 +0000380 return false;
Stefan Bodewigc7e51ed2010-02-19 10:55:27 +0000381 }
382
Stefan Bodewige53e88a2011-07-25 13:28:31 +0000383 @Override
Emmanuel Bourgc6c7ec42013-12-19 00:31:14 +0000384 public int read(byte[] buffer, int offset, int length) throws IOException {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000385 if (closed) {
386 throw new IOException("The stream is closed");
387 }
Emmanuel Bourgbfc8e032013-12-19 10:01:06 +0000388
389 if (current == null) {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000390 return -1;
391 }
392
393 // avoid int overflow, check null buffer
Emmanuel Bourg8bfb8982013-12-19 08:51:47 +0000394 if (offset > buffer.length || length < 0 || offset < 0 || buffer.length - offset < length) {
395 throw new ArrayIndexOutOfBoundsException();
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000396 }
Emmanuel Bourg8bfb8982013-12-19 08:51:47 +0000397
398 ZipUtil.checkRequestedFeatures(current.entry);
399 if (!supportsDataDescriptorFor(current.entry)) {
400 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException.Feature.DATA_DESCRIPTOR,
401 current.entry);
402 }
403
404 int read;
405 if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
406 read = readStored(buffer, offset, length);
407 } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
408 read = readDeflated(buffer, offset, length);
Emmanuel Bourgf9487ac2013-12-19 16:09:54 +0000409 } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
410 || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()) {
Emmanuel Bourg1b47ffe2013-12-19 12:30:01 +0000411 read = current.in.read(buffer, offset, length);
Emmanuel Bourg8bfb8982013-12-19 08:51:47 +0000412 } else {
413 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()),
414 current.entry);
415 }
416
417 if (read >= 0) {
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000418 current.crc.update(buffer, offset, read);
Emmanuel Bourg8bfb8982013-12-19 08:51:47 +0000419 }
420
421 return read;
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000422 }
423
424 /**
425 * Implementation of read for STORED entries.
426 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000427 private int readStored(byte[] buffer, int offset, int length) throws IOException {
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000428
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000429 if (current.hasDataDescriptor) {
430 if (lastStoredEntry == null) {
431 readStoredEntry();
432 }
Emmanuel Bourgc6c7ec42013-12-19 00:31:14 +0000433 return lastStoredEntry.read(buffer, offset, length);
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000434 }
435
436 long csize = current.entry.getSize();
437 if (current.bytesRead >= csize) {
438 return -1;
439 }
440
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000441 if (buf.position() >= buf.limit()) {
442 buf.position(0);
443 int l = in.read(buf.array());
444 if (l == -1) {
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000445 return -1;
446 }
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000447 buf.limit(l);
448
Stefan Bodewig26210c62014-02-21 14:48:33 +0000449 count(l);
450 current.bytesReadFromStream += l;
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000451 }
452
Emmanuel Bourg8301ee72013-12-18 22:10:19 +0000453 int toRead = Math.min(buf.remaining(), length);
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000454 if ((csize - current.bytesRead) < toRead) {
455 // if it is smaller than toRead then it fits into an int
456 toRead = (int) (csize - current.bytesRead);
457 }
Emmanuel Bourgc6c7ec42013-12-19 00:31:14 +0000458 buf.get(buffer, offset, toRead);
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000459 current.bytesRead += toRead;
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000460 return toRead;
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000461 }
Jukka Zitting7b125a32009-12-13 18:31:55 +0000462
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000463 /**
464 * Implementation of read for DEFLATED entries.
465 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000466 private int readDeflated(byte[] buffer, int offset, int length) throws IOException {
Emmanuel Bourgc6c7ec42013-12-19 00:31:14 +0000467 int read = readFromInflater(buffer, offset, length);
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000468 if (read <= 0) {
Stefan Bodewigb4a985f2012-12-28 17:29:53 +0000469 if (inf.finished()) {
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000470 return -1;
Stefan Bodewigb4a985f2012-12-28 17:29:53 +0000471 } else if (inf.needsDictionary()) {
472 throw new ZipException("This archive needs a preset dictionary"
473 + " which is not supported by Commons"
474 + " Compress.");
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000475 } else if (read == -1) {
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000476 throw new IOException("Truncated ZIP file");
Stefan Bodewig008ca942009-03-26 22:29:59 +0000477 }
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000478 }
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000479 return read;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000480 }
481
Stefan Bodewige2cf4122012-12-28 06:53:21 +0000482 /**
483 * Potentially reads more bytes to fill the inflater's buffer and
484 * reads from it.
485 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000486 private int readFromInflater(byte[] buffer, int offset, int length) throws IOException {
Stefan Bodewige2cf4122012-12-28 06:53:21 +0000487 int read = 0;
488 do {
489 if (inf.needsInput()) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000490 int l = fill();
491 if (l > 0) {
492 current.bytesReadFromStream += buf.limit();
493 } else if (l == -1) {
494 return -1;
Stefan Bodewige2cf4122012-12-28 06:53:21 +0000495 } else {
496 break;
497 }
498 }
499 try {
Emmanuel Bourgc6c7ec42013-12-19 00:31:14 +0000500 read = inf.inflate(buffer, offset, length);
Stefan Bodewige2cf4122012-12-28 06:53:21 +0000501 } catch (DataFormatException e) {
Emmanuel Bourgc83c4622013-12-18 23:19:56 +0000502 throw (IOException) new ZipException(e.getMessage()).initCause(e);
Stefan Bodewige2cf4122012-12-28 06:53:21 +0000503 }
504 } while (read == 0 && inf.needsInput());
505 return read;
506 }
507
Stefan Bodewige53e88a2011-07-25 13:28:31 +0000508 @Override
Stefan Bodewig008ca942009-03-26 22:29:59 +0000509 public void close() throws IOException {
510 if (!closed) {
511 closed = true;
512 in.close();
Stefan Bodewigd48f1062011-08-03 13:36:01 +0000513 inf.end();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000514 }
Torsten Curdtca165392008-07-10 10:17:44 +0000515 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000516
Stefan Bodewig7a76c472011-07-20 14:55:50 +0000517 /**
518 * Skips over and discards value bytes of data from this input
519 * stream.
520 *
521 * <p>This implementation may end up skipping over some smaller
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000522 * number of bytes, possibly 0, if and only if it reaches the end
Stefan Bodewig7a76c472011-07-20 14:55:50 +0000523 * of the underlying stream.</p>
524 *
525 * <p>The actual number of bytes skipped is returned.</p>
526 *
527 * @param value the number of bytes to be skipped.
528 * @return the actual number of bytes skipped.
529 * @throws IOException - if an I/O error occurs.
530 * @throws IllegalArgumentException - if value is negative.
531 */
Stefan Bodewige53e88a2011-07-25 13:28:31 +0000532 @Override
Stefan Bodewig008ca942009-03-26 22:29:59 +0000533 public long skip(long value) throws IOException {
534 if (value >= 0) {
535 long skipped = 0;
Stefan Bodewig7a76c472011-07-20 14:55:50 +0000536 while (skipped < value) {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000537 long rem = value - skipped;
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000538 int x = read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
Stefan Bodewig008ca942009-03-26 22:29:59 +0000539 if (x == -1) {
540 return skipped;
541 }
542 skipped += x;
543 }
544 return skipped;
545 }
546 throw new IllegalArgumentException();
547 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000548
Sebastian Bazley6209f812010-05-10 17:36:40 +0000549 /**
550 * Checks if the signature matches what is expected for a zip file.
551 * Does not currently handle self-extracting zips which may have arbitrary
552 * leading content.
Sebastian Bazley2f69e632013-01-22 17:00:33 +0000553 *
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000554 * @param signature the bytes to check
555 * @param length the number of bytes to check
Sebastian Bazley6209f812010-05-10 17:36:40 +0000556 * @return true, if this stream is a zip archive stream, false otherwise
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000557 */
Stefan Bodewigeadbe112009-02-26 09:31:43 +0000558 public static boolean matches(byte[] signature, int length) {
559 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
Stefan Bodewigdff90012009-02-06 08:59:14 +0000560 return false;
561 }
562
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000563 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
Stefan Bodewig0d6defe2013-01-01 11:05:48 +0000564 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG) // empty zip
565 || checksig(signature, ZipArchiveOutputStream.DD_SIG) // split zip
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000566 || checksig(signature, ZipLong.SINGLE_SEGMENT_SPLIT_MARKER.getBytes());
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000567 }
568
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000569 private static boolean checksig(byte[] signature, byte[] expected) {
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000570 for (int i = 0; i < expected.length; i++) {
571 if (signature[i] != expected[i]) {
Stefan Bodewigeadbe112009-02-26 09:31:43 +0000572 return false;
573 }
Stefan Bodewigfa8fea72009-02-06 08:49:49 +0000574 }
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000575 return true;
Torsten Curdtca165392008-07-10 10:17:44 +0000576 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000577
Jukka Zittingb6886eb2009-12-13 19:44:37 +0000578 /**
579 * Closes the current ZIP archive entry and positions the underlying
580 * stream to the beginning of the next entry. All per-entry variables
581 * and data structures are cleared.
582 * <p>
583 * If the compressed size of this entry is included in the entry header,
584 * then any outstanding bytes are simply skipped from the underlying
585 * stream without uncompressing them. This allows an entry to be safely
586 * closed even if the compression method is unsupported.
587 * <p>
588 * In case we don't know the compressed size of this entry or have
589 * already buffered too much data from the underlying stream to support
590 * uncompression, then the uncompression process is completed and the
591 * end position of the stream is adjusted based on the result of that
592 * process.
593 *
594 * @throws IOException if an error occurs
595 */
Stefan Bodewig008ca942009-03-26 22:29:59 +0000596 private void closeEntry() throws IOException {
597 if (closed) {
598 throw new IOException("The stream is closed");
599 }
600 if (current == null) {
601 return;
602 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000603
Jukka Zittingb6886eb2009-12-13 19:44:37 +0000604 // Ensure all entry bytes are read
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000605 if (current.bytesReadFromStream <= current.entry.getCompressedSize()
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000606 && !current.hasDataDescriptor) {
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000607 drainCurrentEntryData();
Jukka Zittingb6886eb2009-12-13 19:44:37 +0000608 } else {
609 skip(Long.MAX_VALUE);
610
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000611 long inB = current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
612 ? getBytesInflated() : current.bytesRead;
Stefan Bodewig797d74f2011-07-21 03:49:35 +0000613
614 // this is at most a single read() operation and can't
615 // exceed the range of int
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000616 int diff = (int) (current.bytesReadFromStream - inB);
Jukka Zittingb6886eb2009-12-13 19:44:37 +0000617
618 // Pushback any required bytes
Stefan Bodewig797d74f2011-07-21 03:49:35 +0000619 if (diff > 0) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000620 pushback(buf.array(), buf.limit() - diff, diff);
Jukka Zittingb6886eb2009-12-13 19:44:37 +0000621 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000622 }
623
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000624 if (lastStoredEntry == null && current.hasDataDescriptor) {
Stefan Bodewig1154e7b2010-03-17 15:47:57 +0000625 readDataDescriptor();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000626 }
627
628 inf.reset();
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000629 buf.clear().flip();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000630 current = null;
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000631 lastStoredEntry = null;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000632 }
633
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000634 /**
635 * Read all data of the current entry from the underlying stream
636 * that hasn't been read, yet.
637 */
638 private void drainCurrentEntryData() throws IOException {
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000639 long remaining = current.entry.getCompressedSize() - current.bytesReadFromStream;
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000640 while (remaining > 0) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000641 long n = in.read(buf.array(), 0, (int) Math.min(buf.capacity(), remaining));
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000642 if (n < 0) {
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000643 throw new EOFException("Truncated ZIP entry: " + current.entry.getName());
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000644 } else {
645 count(n);
646 remaining -= n;
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000647 }
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000648 }
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000649 }
650
651 /**
652 * Get the number of bytes Inflater has actually processed.
653 *
654 * <p>for Java &lt; Java7 the getBytes* methods in
655 * Inflater/Deflater seem to return unsigned ints rather than
656 * longs that start over with 0 at 2^32.</p>
657 *
658 * <p>The stream knows how many bytes it has read, but not how
659 * many the Inflater actually consumed - it should be between the
660 * total number of bytes read for the entry and the total number
661 * minus the last read operation. Here we just try to make the
662 * value close enough to the bytes we've read by assuming the
663 * number of bytes consumed must be smaller than (or equal to) the
664 * number of bytes read but not smaller by more than 2^32.</p>
665 */
666 private long getBytesInflated() {
667 long inB = inf.getBytesRead();
668 if (current.bytesReadFromStream >= TWO_EXP_32) {
669 while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
670 inB += TWO_EXP_32;
671 }
672 }
673 return inB;
674 }
675
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000676 private int fill() throws IOException {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000677 if (closed) {
678 throw new IOException("The stream is closed");
679 }
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000680 int length = in.read(buf.array());
681 if (length > 0) {
682 buf.limit(length);
683 count(buf.limit());
684 inf.setInput(buf.array(), 0, buf.limit());
Stefan Bodewig008ca942009-03-26 22:29:59 +0000685 }
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000686 return length;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000687 }
688
689 private void readFully(byte[] b) throws IOException {
Stefan Bodewig3685eea2013-10-04 13:17:32 +0000690 int count = IOUtils.readFully(in, b);
691 count(count);
692 if (count < b.length) {
693 throw new EOFException();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000694 }
695 }
Stefan Bodewig1154e7b2010-03-17 15:47:57 +0000696
697 private void readDataDescriptor() throws IOException {
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000698 readFully(WORD_BUF);
699 ZipLong val = new ZipLong(WORD_BUF);
Stefan Bodewig1154e7b2010-03-17 15:47:57 +0000700 if (ZipLong.DD_SIG.equals(val)) {
701 // data descriptor with signature, skip sig
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000702 readFully(WORD_BUF);
703 val = new ZipLong(WORD_BUF);
Stefan Bodewig1154e7b2010-03-17 15:47:57 +0000704 }
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000705 current.entry.setCrc(val.getValue());
Stefan Bodewig1e708a02011-08-03 09:55:53 +0000706
707 // if there is a ZIP64 extra field, sizes are eight bytes
708 // each, otherwise four bytes each. Unfortunately some
709 // implementations - namely Java7 - use eight bytes without
710 // using a ZIP64 extra field -
711 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588
712
713 // just read 16 bytes and check whether bytes nine to twelve
714 // look like one of the signatures of what could follow a data
715 // descriptor (ignoring archive decryption headers for now).
716 // If so, push back eight bytes and assume sizes are four
717 // bytes, otherwise sizes are eight bytes each.
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000718 readFully(TWO_DWORD_BUF);
719 ZipLong potentialSig = new ZipLong(TWO_DWORD_BUF, DWORD);
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000720 if (potentialSig.equals(ZipLong.CFH_SIG) || potentialSig.equals(ZipLong.LFH_SIG)) {
Stefan Bodewiga2f978e2013-01-05 19:28:42 +0000721 pushback(TWO_DWORD_BUF, DWORD, DWORD);
722 current.entry.setCompressedSize(ZipLong.getValue(TWO_DWORD_BUF));
723 current.entry.setSize(ZipLong.getValue(TWO_DWORD_BUF, WORD));
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000724 } else {
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000725 current.entry.setCompressedSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF));
726 current.entry.setSize(ZipEightByteInteger.getLongValue(TWO_DWORD_BUF, DWORD));
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000727 }
Stefan Bodewig1154e7b2010-03-17 15:47:57 +0000728 }
729
Stefan Bodewig02e2be62010-03-17 15:55:00 +0000730 /**
731 * Whether this entry requires a data descriptor this library can work with.
732 *
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000733 * @return true if allowStoredEntriesWithDataDescriptor is true,
734 * the entry doesn't require any data descriptor or the method is
735 * DEFLATED.
Stefan Bodewig02e2be62010-03-17 15:55:00 +0000736 */
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000737 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
Stefan Bodewig3fd6f602013-12-19 13:39:09 +0000738 return !entry.getGeneralPurposeBit().usesDataDescriptor()
739
740 || (allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED)
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000741 || entry.getMethod() == ZipEntry.DEFLATED;
Stefan Bodewig02e2be62010-03-17 15:55:00 +0000742 }
743
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000744 /**
745 * Caches a stored entry that uses the data descriptor.
746 *
747 * <ul>
748 * <li>Reads a stored entry until the signature of a local file
749 * header, central directory header or data descriptor has been
750 * found.</li>
751 * <li>Stores all entry data in lastStoredEntry.</p>
752 * <li>Rewinds the stream to position at the data
753 * descriptor.</li>
754 * <li>reads the data descriptor</li>
755 * </ul>
756 *
757 * <p>After calling this method the entry should know its size,
758 * the entry's data is cached and the stream is positioned at the
759 * next local file or central directory header.</p>
760 */
761 private void readStoredEntry() throws IOException {
762 ByteArrayOutputStream bos = new ByteArrayOutputStream();
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000763 int off = 0;
764 boolean done = false;
765
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000766 // length of DD without signature
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000767 int ddLen = current.usesZip64 ? WORD + 2 * DWORD : 3 * WORD;
Stefan Bodewig6c5f04b2011-07-25 20:14:59 +0000768
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000769 while (!done) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000770 int r = in.read(buf.array(), off, ZipArchiveOutputStream.BUFFER_SIZE - off);
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000771 if (r <= 0) {
772 // read the whole archive without ever finding a
773 // central directory
774 throw new IOException("Truncated ZIP file");
775 }
776 if (r + off < 4) {
Emmanuel Bourg8301ee72013-12-18 22:10:19 +0000777 // buffer too small to check for a signature, loop
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000778 off += r;
779 continue;
780 }
781
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000782 done = bufferContainsSignature(bos, off, r, ddLen);
783 if (!done) {
784 off = cacheBytesRead(bos, off, r, ddLen);
785 }
786 }
787
788 byte[] b = bos.toByteArray();
789 lastStoredEntry = new ByteArrayInputStream(b);
790 }
791
792 private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
793 private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
794 private static final byte[] DD = ZipLong.DD_SIG.getBytes();
795
796 /**
797 * Checks whether the current buffer contains the signature of a
Stefan Bodewig26210c62014-02-21 14:48:33 +0000798 * &quot;data descriptor&quot;, &quot;local file header&quot; or
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000799 * &quot;central directory entry&quot;.
800 *
801 * <p>If it contains such a signature, reads the data descriptor
802 * and positions the stream right after the data descriptor.</p>
803 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000804 private boolean bufferContainsSignature(ByteArrayOutputStream bos, int offset, int lastRead, int expectedDDLen)
805 throws IOException {
806
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000807 boolean done = false;
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000808 int readTooMuch = 0;
809 for (int i = 0; !done && i < lastRead - 4; i++) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000810 if (buf.array()[i] == LFH[0] && buf.array()[i + 1] == LFH[1]) {
811 if ((buf.array()[i + 2] == LFH[2] && buf.array()[i + 3] == LFH[3])
812 || (buf.array()[i] == CFH[2] && buf.array()[i + 3] == CFH[3])) {
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000813 // found a LFH or CFH:
814 readTooMuch = offset + lastRead - i - expectedDDLen;
815 done = true;
816 }
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000817 else if (buf.array()[i + 2] == DD[2] && buf.array()[i + 3] == DD[3]) {
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000818 // found DD:
819 readTooMuch = offset + lastRead - i;
820 done = true;
821 }
822 if (done) {
823 // * push back bytes read in excess as well as the data
824 // descriptor
825 // * copy the remaining bytes to cache
826 // * read data descriptor
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000827 pushback(buf.array(), offset + lastRead - readTooMuch, readTooMuch);
828 bos.write(buf.array(), 0, i);
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000829 readDataDescriptor();
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000830 }
831 }
Stefan Bodewig9dd152d2011-08-11 14:22:37 +0000832 }
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000833 return done;
834 }
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000835
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000836 /**
837 * If the last read bytes could hold a data descriptor and an
838 * incomplete signature then save the last bytes to the front of
839 * the buffer and cache everything in front of the potential data
840 * descriptor into the given ByteArrayOutputStream.
841 *
842 * <p>Data descriptor plus incomplete signature (3 bytes in the
843 * worst case) can be 20 bytes max.</p>
844 */
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000845 private int cacheBytesRead(ByteArrayOutputStream bos, int offset, int lastRead, int expecteDDLen) {
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000846 final int cacheable = offset + lastRead - expecteDDLen - 3;
847 if (cacheable > 0) {
Emmanuel Bourg29f975e2013-12-18 23:14:26 +0000848 bos.write(buf.array(), 0, cacheable);
849 System.arraycopy(buf.array(), cacheable, buf.array(), 0, expecteDDLen + 3);
Stefan Bodewig794c20f2011-08-11 14:08:54 +0000850 offset = expecteDDLen + 3;
851 } else {
852 offset += lastRead;
853 }
854 return offset;
Stefan Bodewig03e94a42010-03-19 15:08:23 +0000855 }
Stefan Bodewig1e708a02011-08-03 09:55:53 +0000856
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000857 private void pushback(byte[] buf, int offset, int length) throws IOException {
Stefan Bodewig1e708a02011-08-03 09:55:53 +0000858 ((PushbackInputStream) in).unread(buf, offset, length);
859 pushedBackBytes(length);
860 }
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000861
Stefan Bodewig7181d542013-01-22 12:45:24 +0000862 // End of Central Directory Record
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000863 // end of central dir signature WORD
864 // number of this disk SHORT
Stefan Bodewig7181d542013-01-22 12:45:24 +0000865 // number of the disk with the
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000866 // start of the central directory SHORT
Stefan Bodewig7181d542013-01-22 12:45:24 +0000867 // total number of entries in the
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000868 // central directory on this disk SHORT
Stefan Bodewig7181d542013-01-22 12:45:24 +0000869 // total number of entries in
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000870 // the central directory SHORT
871 // size of the central directory WORD
Stefan Bodewig7181d542013-01-22 12:45:24 +0000872 // offset of start of central
873 // directory with respect to
Stefan Bodewigb86d8a62013-07-17 14:37:22 +0000874 // the starting disk number WORD
875 // .ZIP file comment length SHORT
876 // .ZIP file comment up to 64KB
Stefan Bodewig7181d542013-01-22 12:45:24 +0000877 //
878
879 /**
880 * Reads the stream until it find the "End of central directory
881 * record" and consumes it as well.
882 */
883 private void skipRemainderOfArchive() throws IOException {
884 // skip over central directory. One LFH has been read too much
885 // already. The calculation discounts file names and extra
886 // data so it will be too short.
887 realSkip(entriesRead * CFH_LEN - LFH_LEN);
888 findEocdRecord();
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000889 realSkip(ZipFile.MIN_EOCD_SIZE - WORD /* signature */ - SHORT /* comment len */);
Stefan Bodewig7181d542013-01-22 12:45:24 +0000890 readFully(SHORT_BUF);
891 // file comment
892 realSkip(ZipShort.getValue(SHORT_BUF));
893 }
894
895 /**
896 * Reads forward until the signature of the &quot;End of central
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000897 * directory&quot; record is found.
Stefan Bodewig7181d542013-01-22 12:45:24 +0000898 */
899 private void findEocdRecord() throws IOException {
900 int currentByte = -1;
901 boolean skipReadCall = false;
902 while (skipReadCall || (currentByte = readOneByte()) > -1) {
903 skipReadCall = false;
904 if (!isFirstByteOfEocdSig(currentByte)) {
905 continue;
906 }
907 currentByte = readOneByte();
908 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[1]) {
909 if (currentByte == -1) {
910 break;
911 }
912 skipReadCall = isFirstByteOfEocdSig(currentByte);
913 continue;
914 }
915 currentByte = readOneByte();
916 if (currentByte != ZipArchiveOutputStream.EOCD_SIG[2]) {
917 if (currentByte == -1) {
918 break;
919 }
920 skipReadCall = isFirstByteOfEocdSig(currentByte);
921 continue;
922 }
923 currentByte = readOneByte();
924 if (currentByte == -1
925 || currentByte == ZipArchiveOutputStream.EOCD_SIG[3]) {
926 break;
927 }
928 skipReadCall = isFirstByteOfEocdSig(currentByte);
929 }
930 }
931
932 /**
933 * Skips bytes by reading from the underlying stream rather than
934 * the (potentially inflating) archive stream - which {@link
935 * #skip} would do.
936 *
937 * Also updates bytes-read counter.
938 */
939 private void realSkip(long value) throws IOException {
940 if (value >= 0) {
941 long skipped = 0;
942 while (skipped < value) {
943 long rem = value - skipped;
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000944 int x = in.read(SKIP_BUF, 0, (int) (SKIP_BUF.length > rem ? rem : SKIP_BUF.length));
Stefan Bodewig7181d542013-01-22 12:45:24 +0000945 if (x == -1) {
946 return;
947 }
948 count(x);
949 skipped += x;
950 }
951 return;
952 }
953 throw new IllegalArgumentException();
954 }
955
956 /**
957 * Reads bytes by reading from the underlying stream rather than
Emmanuel Bourg429462a2013-12-19 09:53:17 +0000958 * the (potentially inflating) archive stream - which {@link #read} would do.
Stefan Bodewig7181d542013-01-22 12:45:24 +0000959 *
960 * Also updates bytes-read counter.
961 */
962 private int readOneByte() throws IOException {
963 int b = in.read();
964 if (b != -1) {
965 count(1);
966 }
967 return b;
968 }
969
970 private boolean isFirstByteOfEocdSig(int b) {
971 return b == ZipArchiveOutputStream.EOCD_SIG[0];
972 }
973
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000974 /**
975 * Structure collecting information for the entry that is
976 * currently being read.
977 */
978 private static final class CurrentEntry {
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000979
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000980 /**
981 * Current ZIP entry.
982 */
983 private final ZipArchiveEntry entry = new ZipArchiveEntry();
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000984
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000985 /**
986 * Does the entry use a data descriptor?
987 */
988 private boolean hasDataDescriptor;
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000989
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000990 /**
991 * Does the entry have a ZIP64 extended information extra field.
992 */
993 private boolean usesZip64;
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +0000994
Stefan Bodewig04e132b2011-08-03 13:08:33 +0000995 /**
996 * Number of bytes of entry content read by the client if the
997 * entry is STORED.
998 */
999 private long bytesRead;
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +00001000
Stefan Bodewig04e132b2011-08-03 13:08:33 +00001001 /**
1002 * Number of bytes of entry content read so from the stream.
1003 *
1004 * <p>This may be more than the actual entry's length as some
1005 * stuff gets buffered up and needs to be pushed back when the
1006 * end of the entry has been reached.</p>
1007 */
1008 private long bytesReadFromStream;
Emmanuel Bourgd11e8d12013-12-19 09:35:28 +00001009
1010 /**
1011 * The checksum calculated as the current entry is read.
1012 */
1013 private final CRC32 crc = new CRC32();
Emmanuel Bourg1b47ffe2013-12-19 12:30:01 +00001014
1015 /**
1016 * The input stream decompressing the data for shrunk and imploded entries.
1017 */
1018 private InputStream in;
1019 }
1020
1021 /**
1022 * Bounded input stream adapted from commons-io
1023 */
1024 private class BoundedInputStream extends InputStream {
1025
1026 /** the wrapped input stream */
1027 private final InputStream in;
1028
1029 /** the max length to provide */
1030 private final long max;
1031
1032 /** the number of bytes already returned */
1033 private long pos = 0;
1034
1035 /**
1036 * Creates a new <code>BoundedInputStream</code> that wraps the given input
1037 * stream and limits it to a certain size.
1038 *
1039 * @param in The wrapped input stream
1040 * @param size The maximum number of bytes to return
1041 */
1042 public BoundedInputStream(final InputStream in, final long size) {
1043 this.max = size;
1044 this.in = in;
1045 }
1046
1047 @Override
1048 public int read() throws IOException {
1049 if (max >= 0 && pos >= max) {
1050 return -1;
1051 }
1052 final int result = in.read();
1053 pos++;
1054 count(1);
1055 current.bytesReadFromStream++;
1056 return result;
1057 }
1058
1059 @Override
1060 public int read(final byte[] b) throws IOException {
1061 return this.read(b, 0, b.length);
1062 }
1063
1064 @Override
1065 public int read(final byte[] b, final int off, final int len) throws IOException {
1066 if (max >= 0 && pos >= max) {
1067 return -1;
1068 }
1069 final long maxRead = max >= 0 ? Math.min(len, max - pos) : len;
1070 final int bytesRead = in.read(b, off, (int) maxRead);
1071
1072 if (bytesRead == -1) {
1073 return -1;
1074 }
1075
1076 pos += bytesRead;
1077 count(bytesRead);
1078 current.bytesReadFromStream += bytesRead;
1079 return bytesRead;
1080 }
1081
1082 @Override
1083 public long skip(final long n) throws IOException {
1084 final long toSkip = max >= 0 ? Math.min(n, max - pos) : n;
1085 final long skippedBytes = in.skip(toSkip);
1086 pos += skippedBytes;
1087 return skippedBytes;
1088 }
1089
1090 @Override
1091 public int available() throws IOException {
1092 if (max >= 0 && pos >= max) {
1093 return 0;
1094 }
1095 return in.available();
1096 }
Stefan Bodewig04e132b2011-08-03 13:08:33 +00001097 }
Torsten Curdtca165392008-07-10 10:17:44 +00001098}