blob: befee541cfa1f45d8395edf7ad6ab02b8de86531 [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19package org.apache.commons.compress.archivers.zip;
20
Stefan Bodewig008ca942009-03-26 22:29:59 +000021import java.io.EOFException;
Torsten Curdtca165392008-07-10 10:17:44 +000022import java.io.IOException;
23import java.io.InputStream;
Stefan Bodewig008ca942009-03-26 22:29:59 +000024import java.io.PushbackInputStream;
25import java.util.zip.CRC32;
26import java.util.zip.DataFormatException;
27import java.util.zip.Inflater;
28import java.util.zip.ZipException;
Torsten Curdtca165392008-07-10 10:17:44 +000029
30import org.apache.commons.compress.archivers.ArchiveEntry;
31import org.apache.commons.compress.archivers.ArchiveInputStream;
32
Sebastian Bazley99870ef2009-03-28 00:04:36 +000033/**
Sebastian Bazleyf7f6b182009-03-31 10:36:25 +000034 * Implements an input stream that can read Zip archives.
35 * <p>
36 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
37 * is not available from the header.
38 * <p>
39 * The {@link ZipFile} class is preferred when reading from files.
40 *
41 * @see ZipFile
Sebastian Bazley99870ef2009-03-28 00:04:36 +000042 * @NotThreadSafe
43 */
Torsten Curdtca165392008-07-10 10:17:44 +000044public class ZipArchiveInputStream extends ArchiveInputStream {
45
Stefan Bodewig008ca942009-03-26 22:29:59 +000046 private static final int SHORT = 2;
47 private static final int WORD = 4;
48
49 /**
50 * The zip encoding to use for filenames and the file comment.
51 */
52 private final ZipEncoding zipEncoding;
53
54 /**
55 * Whether to look for and use Unicode extra fields.
56 */
Stefan Bodewigf84dd362009-04-28 08:31:15 +000057 private final boolean useUnicodeExtraFields;
Stefan Bodewig008ca942009-03-26 22:29:59 +000058
59 private final InputStream in;
60
61 private final Inflater inf = new Inflater(true);
62 private final CRC32 crc = new CRC32();
63
Stefan Bodewig008ca942009-03-26 22:29:59 +000064 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
65
66 private ZipArchiveEntry current = null;
67 private boolean closed = false;
68 private boolean hitCentralDirectory = false;
69 private int readBytesOfEntry = 0, offsetInBuffer = 0;
70 private int bytesReadFromStream = 0;
71 private int lengthOfLastRead = 0;
72 private boolean hasDataDescriptor = false;
73
74 private static final int LFH_LEN = 30;
75 /*
76 local file header signature 4 bytes (0x04034b50)
77 version needed to extract 2 bytes
78 general purpose bit flag 2 bytes
79 compression method 2 bytes
80 last mod file time 2 bytes
81 last mod file date 2 bytes
82 crc-32 4 bytes
83 compressed size 4 bytes
84 uncompressed size 4 bytes
85 file name length 2 bytes
86 extra field length 2 bytes
87 */
Torsten Curdtca165392008-07-10 10:17:44 +000088
Stefan Bodewig743d7c52009-02-05 11:01:35 +000089 public ZipArchiveInputStream(InputStream inputStream) {
Stefan Bodewig008ca942009-03-26 22:29:59 +000090 this(inputStream, ZipEncodingHelper.UTF8, true);
91 }
92
93 /**
94 * @param encoding the encoding to use for file names, use null
95 * for the platform's default encoding
96 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
97 * Extra Fields (if present) to set the file names.
98 */
99 public ZipArchiveInputStream(InputStream inputStream,
100 String encoding,
101 boolean useUnicodeExtraFields) {
102 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
103 this.useUnicodeExtraFields = useUnicodeExtraFields;
104 in = new PushbackInputStream(inputStream, buf.length);
Stefan Bodewig743d7c52009-02-05 11:01:35 +0000105 }
Torsten Curdtca165392008-07-10 10:17:44 +0000106
Stefan Bodewiga7049ab2009-02-11 07:44:00 +0000107 public ZipArchiveEntry getNextZipEntry() throws IOException {
Stefan Bodewig008ca942009-03-26 22:29:59 +0000108 if (closed || hitCentralDirectory) {
Stefan Bodewigfa8fea72009-02-06 08:49:49 +0000109 return null;
110 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000111 if (current != null) {
112 closeEntry();
113 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000114 byte[] lfh = new byte[LFH_LEN];
115 try {
116 readFully(lfh);
117 } catch (EOFException e) {
118 return null;
119 }
120 ZipLong sig = new ZipLong(lfh);
121 if (sig.equals(ZipLong.CFH_SIG)) {
122 hitCentralDirectory = true;
123 return null;
124 }
125 if (!sig.equals(ZipLong.LFH_SIG)) {
126 return null;
127 }
128
129 int off = WORD;
130 current = new ZipArchiveEntry();
131
132 int versionMadeBy = ZipShort.getValue(lfh, off);
133 off += SHORT;
134 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
135 & ZipFile.NIBLET_MASK);
136
137 final int generalPurposeFlag = ZipShort.getValue(lfh, off);
138 final boolean hasEFS =
139 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
140 final ZipEncoding entryEncoding =
141 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
142 hasDataDescriptor = (generalPurposeFlag & 8) != 0;
143
144 off += SHORT;
145
146 current.setMethod(ZipShort.getValue(lfh, off));
147 off += SHORT;
148
149 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
150 current.setTime(time);
151 off += WORD;
152
153 if (!hasDataDescriptor) {
154 current.setCrc(ZipLong.getValue(lfh, off));
155 off += WORD;
156
157 current.setCompressedSize(ZipLong.getValue(lfh, off));
158 off += WORD;
159
160 current.setSize(ZipLong.getValue(lfh, off));
161 off += WORD;
162 } else {
163 off += 3 * WORD;
164 }
165
166 int fileNameLen = ZipShort.getValue(lfh, off);
167
168 off += SHORT;
169
170 int extraLen = ZipShort.getValue(lfh, off);
171 off += SHORT;
172
173 byte[] fileName = new byte[fileNameLen];
174 readFully(fileName);
175 current.setName(entryEncoding.decode(fileName));
176
177 byte[] extraData = new byte[extraLen];
178 readFully(extraData);
179 current.setExtra(extraData);
180
Stefan Bodewigf84dd362009-04-28 08:31:15 +0000181 if (!hasEFS && useUnicodeExtraFields) {
182 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
183 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000184 return current;
Stefan Bodewiga7049ab2009-02-11 07:44:00 +0000185 }
186
187 public ArchiveEntry getNextEntry() throws IOException {
188 return getNextZipEntry();
Torsten Curdtca165392008-07-10 10:17:44 +0000189 }
190
Stefan Bodewig008ca942009-03-26 22:29:59 +0000191 public int read(byte[] buffer, int start, int length) throws IOException {
192 if (closed) {
193 throw new IOException("The stream is closed");
194 }
195 if (inf.finished() || current == null) {
196 return -1;
197 }
198
199 // avoid int overflow, check null buffer
200 if (start <= buffer.length && length >= 0 && start >= 0
201 && buffer.length - start >= length) {
202 if (current.getMethod() == ZipArchiveOutputStream.STORED) {
203 int csize = (int) current.getSize();
204 if (readBytesOfEntry >= csize) {
205 return -1;
206 }
207 if (offsetInBuffer >= lengthOfLastRead) {
208 offsetInBuffer = 0;
209 if ((lengthOfLastRead = in.read(buf)) == -1) {
210 return -1;
211 }
Christian Grobmeier4bce1fb2009-04-10 15:36:15 +0000212 count(lengthOfLastRead);
Stefan Bodewig008ca942009-03-26 22:29:59 +0000213 bytesReadFromStream += lengthOfLastRead;
214 }
215 int toRead = length > lengthOfLastRead
216 ? lengthOfLastRead - offsetInBuffer
217 : length;
218 if ((csize - readBytesOfEntry) < toRead) {
219 toRead = csize - readBytesOfEntry;
220 }
221 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
222 offsetInBuffer += toRead;
223 readBytesOfEntry += toRead;
224 crc.update(buffer, start, toRead);
225 return toRead;
226 }
227 if (inf.needsInput()) {
228 fill();
229 if (lengthOfLastRead > 0) {
230 bytesReadFromStream += lengthOfLastRead;
231 }
232 }
233 int read = 0;
234 try {
235 read = inf.inflate(buffer, start, length);
236 } catch (DataFormatException e) {
237 throw new ZipException(e.getMessage());
238 }
239 if (read == 0 && inf.finished()) {
240 return -1;
241 }
242 crc.update(buffer, start, read);
243 return read;
244 }
245 throw new ArrayIndexOutOfBoundsException();
246 }
247
248 public void close() throws IOException {
249 if (!closed) {
250 closed = true;
251 in.close();
252 }
Torsten Curdtca165392008-07-10 10:17:44 +0000253 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000254
Stefan Bodewig008ca942009-03-26 22:29:59 +0000255 public long skip(long value) throws IOException {
256 if (value >= 0) {
257 long skipped = 0;
258 byte[] b = new byte[1024];
259 while (skipped != value) {
260 long rem = value - skipped;
261 int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
262 if (x == -1) {
263 return skipped;
264 }
265 skipped += x;
266 }
267 return skipped;
268 }
269 throw new IllegalArgumentException();
270 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000271
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000272 /*
273 * This test assumes that the zip file does not have any additional leading content,
274 * which is something that is allowed by the specification (e.g. self-extracting zips)
275 */
Stefan Bodewigeadbe112009-02-26 09:31:43 +0000276 public static boolean matches(byte[] signature, int length) {
277 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
Stefan Bodewigdff90012009-02-06 08:59:14 +0000278 return false;
279 }
280
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000281 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
282 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
283 }
284
285 private static boolean checksig(byte[] signature, byte[] expected){
286 for (int i = 0; i < expected.length; i++) {
287 if (signature[i] != expected[i]) {
Stefan Bodewigeadbe112009-02-26 09:31:43 +0000288 return false;
289 }
Stefan Bodewigfa8fea72009-02-06 08:49:49 +0000290 }
Sebastian Bazleyfa526cb2009-03-30 00:39:05 +0000291 return true;
Torsten Curdtca165392008-07-10 10:17:44 +0000292 }
Stefan Bodewig008ca942009-03-26 22:29:59 +0000293
294 private void closeEntry() throws IOException {
295 if (closed) {
296 throw new IOException("The stream is closed");
297 }
298 if (current == null) {
299 return;
300 }
301 // Ensure all entry bytes are read
302 skip(Long.MAX_VALUE);
Sebastian Bazley7ae0eec2009-04-25 14:55:05 +0000303 int inB;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000304 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
305 inB = inf.getTotalIn();
Stefan Bodewig008ca942009-03-26 22:29:59 +0000306 } else {
307 inB = readBytesOfEntry;
Stefan Bodewig008ca942009-03-26 22:29:59 +0000308 }
309 int diff = 0;
310
311 // Pushback any required bytes
312 if ((diff = bytesReadFromStream - inB) != 0) {
313 ((PushbackInputStream) in).unread(buf,
314 lengthOfLastRead - diff, diff);
315 }
316
317 if (hasDataDescriptor) {
318 readFully(new byte[4 * WORD]);
319 }
320
321 inf.reset();
322 readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
323 lengthOfLastRead = 0;
324 crc.reset();
325 current = null;
326 }
327
328 private void fill() throws IOException {
329 if (closed) {
330 throw new IOException("The stream is closed");
331 }
332 if ((lengthOfLastRead = in.read(buf)) > 0) {
333 inf.setInput(buf, 0, lengthOfLastRead);
334 }
335 }
336
337 private void readFully(byte[] b) throws IOException {
338 int count = 0, x = 0;
339 while (count != b.length) {
340 count += x = in.read(b, count, b.length - count);
341 if (x == -1) {
342 throw new EOFException();
343 }
344 }
345 }
Torsten Curdtca165392008-07-10 10:17:44 +0000346}