blob: d29e4e7f27e08203eade8941dfdf8774cb46ed42 [file] [log] [blame]
Torsten Curdte190c402009-01-12 11:29:53 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 */
18package org.apache.commons.compress.archivers.zip;
19
20import java.io.File;
21import java.io.IOException;
22import java.io.InputStream;
23import java.io.RandomAccessFile;
24import java.io.UnsupportedEncodingException;
25import java.util.Calendar;
Stefan Bodewig2ae5e282009-02-04 08:45:31 +000026import java.util.Collections;
Torsten Curdte190c402009-01-12 11:29:53 +000027import java.util.Date;
28import java.util.Enumeration;
Stefan Bodewig2ae5e282009-02-04 08:45:31 +000029import java.util.HashMap;
30import java.util.Map;
Torsten Curdte190c402009-01-12 11:29:53 +000031import java.util.zip.Inflater;
32import java.util.zip.InflaterInputStream;
33import java.util.zip.ZipException;
34
35/**
36 * Replacement for <code>java.util.ZipFile</code>.
37 *
38 * <p>This class adds support for file name encodings other than UTF-8
39 * (which is required to work on ZIP files created by native zip tools
40 * and is able to skip a preamble like the one found in self
41 * extracting archives. Furthermore it returns instances of
42 * <code>org.apache.tools.zip.ZipEntry</code> instead of
43 * <code>java.util.zip.ZipEntry</code>.</p>
44 *
45 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
46 * have to reimplement all methods anyway. Like
47 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
48 * covers and supports compressed and uncompressed entries.</p>
49 *
50 * <p>The method signatures mimic the ones of
51 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
52 *
53 * <ul>
54 * <li>There is no getName method.</li>
55 * <li>entries has been renamed to getEntries.</li>
56 * <li>getEntries and getEntry return
57 * <code>org.apache.tools.zip.ZipEntry</code> instances.</li>
58 * <li>close is allowed to throw IOException.</li>
59 * </ul>
60 *
61 */
62public class ZipFile {
63 private static final int HASH_SIZE = 509;
64 private static final int SHORT = 2;
65 private static final int WORD = 4;
66 private static final int NIBLET_MASK = 0x0f;
67 private static final int BYTE_SHIFT = 8;
68 private static final int POS_0 = 0;
69 private static final int POS_1 = 1;
70 private static final int POS_2 = 2;
71 private static final int POS_3 = 3;
72
73 /**
74 * Maps ZipEntrys to Longs, recording the offsets of the local
75 * file headers.
76 */
Stefan Bodewig2ae5e282009-02-04 08:45:31 +000077 private final Map entries = new HashMap(HASH_SIZE);
Torsten Curdte190c402009-01-12 11:29:53 +000078
79 /**
80 * Maps String to ZipEntrys, name -> actual entry.
81 */
Stefan Bodewig2ae5e282009-02-04 08:45:31 +000082 private final Map nameMap = new HashMap(HASH_SIZE);
Torsten Curdte190c402009-01-12 11:29:53 +000083
84 private static final class OffsetEntry {
85 private long headerOffset = -1;
86 private long dataOffset = -1;
87 }
88
89 /**
90 * The encoding to use for filenames and the file comment.
91 *
92 * <p>For a list of possible values see <a
93 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
94 * Defaults to the platform's default character encoding.</p>
95 */
96 private String encoding = null;
97
98 /**
99 * The actual data source.
100 */
101 private RandomAccessFile archive;
102
103 /**
104 * Opens the given file for reading, assuming the platform's
105 * native encoding for file names.
106 *
107 * @param f the archive.
108 *
109 * @throws IOException if an error occurs while reading the file.
110 */
111 public ZipFile(File f) throws IOException {
112 this(f, null);
113 }
114
115 /**
116 * Opens the given file for reading, assuming the platform's
117 * native encoding for file names.
118 *
119 * @param name name of the archive.
120 *
121 * @throws IOException if an error occurs while reading the file.
122 */
123 public ZipFile(String name) throws IOException {
124 this(new File(name), null);
125 }
126
127 /**
128 * Opens the given file for reading, assuming the specified
129 * encoding for file names.
130 *
131 * @param name name of the archive.
132 * @param encoding the encoding to use for file names
133 *
134 * @throws IOException if an error occurs while reading the file.
135 */
136 public ZipFile(String name, String encoding) throws IOException {
137 this(new File(name), encoding);
138 }
139
140 /**
141 * Opens the given file for reading, assuming the specified
142 * encoding for file names.
143 *
144 * @param f the archive.
145 * @param encoding the encoding to use for file names
146 *
147 * @throws IOException if an error occurs while reading the file.
148 */
149 public ZipFile(File f, String encoding) throws IOException {
150 this.encoding = encoding;
151 archive = new RandomAccessFile(f, "r");
Stefan Bodewig4669f292009-02-04 04:56:10 +0000152 boolean success = false;
Torsten Curdte190c402009-01-12 11:29:53 +0000153 try {
154 populateFromCentralDirectory();
155 resolveLocalFileHeaderData();
Stefan Bodewig4669f292009-02-04 04:56:10 +0000156 success = true;
157 } finally {
158 if (!success) {
159 try {
160 archive.close();
161 } catch (IOException e2) {
162 // swallow, throw the original exception instead
163 }
Torsten Curdte190c402009-01-12 11:29:53 +0000164 }
Torsten Curdte190c402009-01-12 11:29:53 +0000165 }
166 }
167
168 /**
169 * The encoding to use for filenames and the file comment.
170 *
171 * @return null if using the platform's default character encoding.
172 */
173 public String getEncoding() {
174 return encoding;
175 }
176
177 /**
178 * Closes the archive.
179 * @throws IOException if an error occurs closing the archive.
180 */
181 public void close() throws IOException {
182 archive.close();
183 }
184
185 /**
186 * close a zipfile quietly; throw no io fault, do nothing
187 * on a null parameter
188 * @param zipfile file to close, can be null
189 */
190 public static void closeQuietly(ZipFile zipfile) {
191 if (zipfile != null) {
192 try {
193 zipfile.close();
194 } catch (IOException e) {
195 //ignore
196 }
197 }
198 }
199
200 /**
201 * Returns all entries.
202 * @return all entries as {@link ZipEntry} instances
203 */
204 public Enumeration getEntries() {
Stefan Bodewig2ae5e282009-02-04 08:45:31 +0000205 return Collections.enumeration(entries.keySet());
Torsten Curdte190c402009-01-12 11:29:53 +0000206 }
207
208 /**
209 * Returns a named entry - or <code>null</code> if no entry by
210 * that name exists.
211 * @param name name of the entry.
212 * @return the ZipEntry corresponding to the given name - or
213 * <code>null</code> if not present.
214 */
215 public ZipEntry getEntry(String name) {
216 return (ZipEntry) nameMap.get(name);
217 }
218
219 /**
220 * Returns an InputStream for reading the contents of the given entry.
221 * @param ze the entry to get the stream for.
222 * @return a stream to read the entry from.
223 * @throws IOException if unable to create an input stream from the zipenty
224 * @throws ZipException if the zipentry has an unsupported compression method
225 */
226 public InputStream getInputStream(ZipEntry ze)
227 throws IOException, ZipException {
228 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
229 if (offsetEntry == null) {
230 return null;
231 }
232 long start = offsetEntry.dataOffset;
233 BoundedInputStream bis =
234 new BoundedInputStream(start, ze.getCompressedSize());
235 switch (ze.getMethod()) {
236 case ZipEntry.STORED:
237 return bis;
238 case ZipEntry.DEFLATED:
239 bis.addDummy();
240 return new InflaterInputStream(bis, new Inflater(true));
241 default:
242 throw new ZipException("Found unsupported compression method "
243 + ze.getMethod());
244 }
245 }
246
247 private static final int CFH_LEN =
248 /* version made by */ SHORT
249 /* version needed to extract */ + SHORT
250 /* general purpose bit flag */ + SHORT
251 /* compression method */ + SHORT
252 /* last mod file time */ + SHORT
253 /* last mod file date */ + SHORT
254 /* crc-32 */ + WORD
255 /* compressed size */ + WORD
256 /* uncompressed size */ + WORD
257 /* filename length */ + SHORT
258 /* extra field length */ + SHORT
259 /* file comment length */ + SHORT
260 /* disk number start */ + SHORT
261 /* internal file attributes */ + SHORT
262 /* external file attributes */ + WORD
263 /* relative offset of local header */ + WORD;
264
265 /**
266 * Reads the central directory of the given archive and populates
267 * the internal tables with ZipEntry instances.
268 *
269 * <p>The ZipEntrys will know all data that can be obtained from
270 * the central directory alone, but not the data that requires the
271 * local file header or additional data to be read.</p>
272 */
273 private void populateFromCentralDirectory()
274 throws IOException {
275 positionAtCentralDirectory();
276
277 byte[] cfh = new byte[CFH_LEN];
278
279 byte[] signatureBytes = new byte[WORD];
280 archive.readFully(signatureBytes);
281 long sig = ZipLong.getValue(signatureBytes);
282 final long cfhSig = ZipLong.getValue(ZipOutputStream.CFH_SIG);
283 if (sig != cfhSig && startsWithLocalFileHeader()) {
284 throw new IOException("central directory is empty, can't expand"
285 + " corrupt archive.");
286 }
287 while (sig == cfhSig) {
288 archive.readFully(cfh);
289 int off = 0;
290 ZipEntry ze = new ZipEntry();
291
292 int versionMadeBy = ZipShort.getValue(cfh, off);
293 off += SHORT;
294 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
295
296 off += WORD; // skip version info and general purpose byte
297
298 ze.setMethod(ZipShort.getValue(cfh, off));
299 off += SHORT;
300
301 // FIXME this is actually not very cpu cycles friendly as we are converting from
302 // dos to java while the underlying Sun implementation will convert
303 // from java to dos time for internal storage...
304 long time = dosToJavaTime(ZipLong.getValue(cfh, off));
305 ze.setTime(time);
306 off += WORD;
307
308 ze.setCrc(ZipLong.getValue(cfh, off));
309 off += WORD;
310
311 ze.setCompressedSize(ZipLong.getValue(cfh, off));
312 off += WORD;
313
314 ze.setSize(ZipLong.getValue(cfh, off));
315 off += WORD;
316
317 int fileNameLen = ZipShort.getValue(cfh, off);
318 off += SHORT;
319
320 int extraLen = ZipShort.getValue(cfh, off);
321 off += SHORT;
322
323 int commentLen = ZipShort.getValue(cfh, off);
324 off += SHORT;
325
326 off += SHORT; // disk number
327
328 ze.setInternalAttributes(ZipShort.getValue(cfh, off));
329 off += SHORT;
330
331 ze.setExternalAttributes(ZipLong.getValue(cfh, off));
332 off += WORD;
333
334 byte[] fileName = new byte[fileNameLen];
335 archive.readFully(fileName);
336 ze.setName(getString(fileName));
337
338
339 // LFH offset,
340 OffsetEntry offset = new OffsetEntry();
341 offset.headerOffset = ZipLong.getValue(cfh, off);
342 // data offset will be filled later
343 entries.put(ze, offset);
344
345 nameMap.put(ze.getName(), ze);
346
Stefan Bodewig5e5804c2009-02-05 12:45:23 +0000347 int lenToSkip = extraLen;
348 while (lenToSkip > 0) {
349 int skipped = archive.skipBytes(lenToSkip);
350 if (skipped <= 0) {
351 throw new RuntimeException("failed to skip extra data in"
352 + " central directory");
353 }
354 lenToSkip -= skipped;
355 }
Torsten Curdte190c402009-01-12 11:29:53 +0000356
357 byte[] comment = new byte[commentLen];
358 archive.readFully(comment);
359 ze.setComment(getString(comment));
360
361 archive.readFully(signatureBytes);
362 sig = ZipLong.getValue(signatureBytes);
363 }
364 }
365
366 private static final int MIN_EOCD_SIZE =
367 /* end of central dir signature */ WORD
368 /* number of this disk */ + SHORT
369 /* number of the disk with the */
370 /* start of the central directory */ + SHORT
371 /* total number of entries in */
372 /* the central dir on this disk */ + SHORT
373 /* total number of entries in */
374 /* the central dir */ + SHORT
375 /* size of the central directory */ + WORD
376 /* offset of start of central */
377 /* directory with respect to */
378 /* the starting disk number */ + WORD
379 /* zipfile comment length */ + SHORT;
380
381 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
382 /* maximum length of zipfile comment */ + 0xFFFF;
383
384 private static final int CFD_LOCATOR_OFFSET =
385 /* end of central dir signature */ WORD
386 /* number of this disk */ + SHORT
387 /* number of the disk with the */
388 /* start of the central directory */ + SHORT
389 /* total number of entries in */
390 /* the central dir on this disk */ + SHORT
391 /* total number of entries in */
392 /* the central dir */ + SHORT
393 /* size of the central directory */ + WORD;
394
395 /**
396 * Searches for the &quot;End of central dir record&quot;, parses
397 * it and positions the stream at the first central directory
398 * record.
399 */
400 private void positionAtCentralDirectory()
401 throws IOException {
402 boolean found = false;
403 long off = archive.length() - MIN_EOCD_SIZE;
404 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
405 if (off >= 0) {
406 archive.seek(off);
407 byte[] sig = ZipOutputStream.EOCD_SIG;
408 int curr = archive.read();
409 while (off >= stopSearching && curr != -1) {
410 if (curr == sig[POS_0]) {
411 curr = archive.read();
412 if (curr == sig[POS_1]) {
413 curr = archive.read();
414 if (curr == sig[POS_2]) {
415 curr = archive.read();
416 if (curr == sig[POS_3]) {
417 found = true;
418 break;
419 }
420 }
421 }
422 }
423 archive.seek(--off);
424 curr = archive.read();
425 }
426 }
427 if (!found) {
428 throw new ZipException("archive is not a ZIP archive");
429 }
430 archive.seek(off + CFD_LOCATOR_OFFSET);
431 byte[] cfdOffset = new byte[WORD];
432 archive.readFully(cfdOffset);
433 archive.seek(ZipLong.getValue(cfdOffset));
434 }
435
436 /**
437 * Number of bytes in local file header up to the &quot;length of
438 * filename&quot; entry.
439 */
440 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
441 /* local file header signature */ WORD
442 /* version needed to extract */ + SHORT
443 /* general purpose bit flag */ + SHORT
444 /* compression method */ + SHORT
445 /* last mod file time */ + SHORT
446 /* last mod file date */ + SHORT
447 /* crc-32 */ + WORD
448 /* compressed size */ + WORD
449 /* uncompressed size */ + WORD;
450
451 /**
452 * Walks through all recorded entries and adds the data available
453 * from the local file header.
454 *
455 * <p>Also records the offsets for the data to read from the
456 * entries.</p>
457 */
458 private void resolveLocalFileHeaderData()
459 throws IOException {
460 Enumeration e = getEntries();
461 while (e.hasMoreElements()) {
462 ZipEntry ze = (ZipEntry) e.nextElement();
463 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
464 long offset = offsetEntry.headerOffset;
465 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
466 byte[] b = new byte[SHORT];
467 archive.readFully(b);
468 int fileNameLen = ZipShort.getValue(b);
469 archive.readFully(b);
470 int extraFieldLen = ZipShort.getValue(b);
Stefan Bodewig5e5804c2009-02-05 12:45:23 +0000471 int lenToSkip = fileNameLen;
472 while (lenToSkip > 0) {
473 int skipped = archive.skipBytes(lenToSkip);
474 if (skipped <= 0) {
475 throw new RuntimeException("failed to skip file name in"
476 + " local file header");
477 }
478 lenToSkip -= skipped;
479 }
Torsten Curdte190c402009-01-12 11:29:53 +0000480 byte[] localExtraData = new byte[extraFieldLen];
481 archive.readFully(localExtraData);
482 ze.setExtra(localExtraData);
483 /*dataOffsets.put(ze,
484 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
485 + SHORT + SHORT + fileNameLen + extraFieldLen));
486 */
487 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
488 + SHORT + SHORT + fileNameLen + extraFieldLen;
489 }
490 }
491
492 /**
493 * Convert a DOS date/time field to a Date object.
494 *
495 * @param zipDosTime contains the stored DOS time.
496 * @return a Date instance corresponding to the given time.
497 */
498 protected static Date fromDosTime(ZipLong zipDosTime) {
499 long dosTime = zipDosTime.getValue();
500 return new Date(dosToJavaTime(dosTime));
501 }
502
503 /*
504 * Converts DOS time to Java time (number of milliseconds since epoch).
505 */
506 private static long dosToJavaTime(long dosTime) {
507 Calendar cal = Calendar.getInstance();
508 // CheckStyle:MagicNumberCheck OFF - no point
509 cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980);
510 cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1);
511 cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f);
512 cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f);
513 cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f);
514 cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e);
515 // CheckStyle:MagicNumberCheck ON
516 return cal.getTime().getTime();
517 }
518
519
520 /**
521 * Retrieve a String from the given bytes using the encoding set
522 * for this ZipFile.
523 *
524 * @param bytes the byte array to transform
525 * @return String obtained by using the given encoding
526 * @throws ZipException if the encoding cannot be recognized.
527 */
528 protected String getString(byte[] bytes) throws ZipException {
529 if (encoding == null) {
530 return new String(bytes);
531 } else {
532 try {
533 return new String(bytes, encoding);
534 } catch (UnsupportedEncodingException uee) {
535 throw new ZipException(uee.getMessage());
536 }
537 }
538 }
539
540 /**
541 * Checks whether the archive starts with a LFH. If it doesn't,
542 * it may be an empty archive.
543 */
544 private boolean startsWithLocalFileHeader() throws IOException {
545 archive.seek(0);
546 final byte[] start = new byte[WORD];
547 archive.readFully(start);
548 for (int i = 0; i < start.length; i++) {
549 if (start[i] != ZipOutputStream.LFH_SIG[i]) {
550 return false;
551 }
552 }
553 return true;
554 }
555
556 /**
557 * InputStream that delegates requests to the underlying
558 * RandomAccessFile, making sure that only bytes from a certain
559 * range can be read.
560 */
561 private class BoundedInputStream extends InputStream {
562 private long remaining;
563 private long loc;
564 private boolean addDummyByte = false;
565
566 BoundedInputStream(long start, long remaining) {
567 this.remaining = remaining;
568 loc = start;
569 }
570
571 public int read() throws IOException {
572 if (remaining-- <= 0) {
573 if (addDummyByte) {
574 addDummyByte = false;
575 return 0;
576 }
577 return -1;
578 }
579 synchronized (archive) {
580 archive.seek(loc++);
581 return archive.read();
582 }
583 }
584
585 public int read(byte[] b, int off, int len) throws IOException {
586 if (remaining <= 0) {
587 if (addDummyByte) {
588 addDummyByte = false;
589 b[off] = 0;
590 return 1;
591 }
592 return -1;
593 }
594
595 if (len <= 0) {
596 return 0;
597 }
598
599 if (len > remaining) {
600 len = (int) remaining;
601 }
602 int ret = -1;
603 synchronized (archive) {
604 archive.seek(loc);
605 ret = archive.read(b, off, len);
606 }
607 if (ret > 0) {
608 loc += ret;
609 remaining -= ret;
610 }
611 return ret;
612 }
613
614 /**
615 * Inflater needs an extra dummy byte for nowrap - see
616 * Inflater's javadocs.
617 */
618 void addDummy() {
619 addDummyByte = true;
620 }
621 }
622
623}