blob: 6c701dd0b67347a58f906475b4e5715471b566ab [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Read-only access to Zip archives, with minimal heap allocation.
19//
20#define LOG_TAG "zipro"
21//#define LOG_NDEBUG 0
Mathias Agopian8ae23352009-06-04 13:53:57 -070022#include <utils/ZipFileRO.h>
23#include <utils/Log.h>
24#include <utils/misc.h>
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080025
26#include <zlib.h>
27
28#include <string.h>
29#include <fcntl.h>
30#include <errno.h>
31#include <assert.h>
32
33using namespace android;
34
35/*
36 * Zip file constants.
37 */
38#define kEOCDSignature 0x06054b50
39#define kEOCDLen 22
40#define kEOCDNumEntries 8 // offset to #of entries in file
41#define kEOCDFileOffset 16 // offset to central directory
42
43#define kMaxCommentLen 65535 // longest possible in ushort
44#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen)
45
46#define kLFHSignature 0x04034b50
47#define kLFHLen 30 // excluding variable-len fields
48#define kLFHNameLen 26 // offset to filename length
49#define kLFHExtraLen 28 // offset to extra length
50
51#define kCDESignature 0x02014b50
52#define kCDELen 46 // excluding variable-len fields
53#define kCDEMethod 10 // offset to compression method
54#define kCDEModWhen 12 // offset to modification timestamp
55#define kCDECRC 16 // offset to entry CRC
56#define kCDECompLen 20 // offset to compressed length
57#define kCDEUncompLen 24 // offset to uncompressed length
58#define kCDENameLen 28 // offset to filename length
59#define kCDEExtraLen 30 // offset to extra length
60#define kCDECommentLen 32 // offset to comment length
61#define kCDELocalOffset 42 // offset to local hdr
62
63/*
64 * The values we return for ZipEntryRO use 0 as an invalid value, so we
65 * want to adjust the hash table index by a fixed amount. Using a large
66 * value helps insure that people don't mix & match arguments, e.g. to
67 * findEntryByIndex().
68 */
69#define kZipEntryAdj 10000
70
71/*
72 * Convert a ZipEntryRO to a hash table index, verifying that it's in a
73 * valid range.
74 */
75int ZipFileRO::entryToIndex(const ZipEntryRO entry) const
76{
77 long ent = ((long) entry) - kZipEntryAdj;
78 if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) {
79 LOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent);
80 return -1;
81 }
82 return ent;
83}
84
85
86/*
87 * Open the specified file read-only. We memory-map the entire thing and
88 * close the file before returning.
89 */
90status_t ZipFileRO::open(const char* zipFileName)
91{
92 int fd = -1;
93 off_t length;
94
95 assert(mFileMap == NULL);
96
97 /*
98 * Open and map the specified file.
99 */
100 fd = ::open(zipFileName, O_RDONLY);
101 if (fd < 0) {
102 LOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno));
103 return NAME_NOT_FOUND;
104 }
105
106 length = lseek(fd, 0, SEEK_END);
107 if (length < 0) {
108 close(fd);
109 return UNKNOWN_ERROR;
110 }
111
112 mFileMap = new FileMap();
113 if (mFileMap == NULL) {
114 close(fd);
115 return NO_MEMORY;
116 }
117 if (!mFileMap->create(zipFileName, fd, 0, length, true)) {
118 LOGW("Unable to map '%s': %s\n", zipFileName, strerror(errno));
119 close(fd);
120 return UNKNOWN_ERROR;
121 }
122
123 mFd = fd;
124
125 /*
126 * Got it mapped, verify it and create data structures for fast access.
127 */
128 if (!parseZipArchive()) {
129 mFileMap->release();
130 mFileMap = NULL;
131 return UNKNOWN_ERROR;
132 }
133
134 return OK;
135}
136
137/*
138 * Parse the Zip archive, verifying its contents and initializing internal
139 * data structures.
140 */
141bool ZipFileRO::parseZipArchive(void)
142{
143#define CHECK_OFFSET(_off) { \
144 if ((unsigned int) (_off) >= maxOffset) { \
145 LOGE("ERROR: bad offset %u (max %d): %s\n", \
146 (unsigned int) (_off), maxOffset, #_off); \
147 goto bail; \
148 } \
149 }
150 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr();
151 const unsigned char* ptr;
152 size_t length = mFileMap->getDataLength();
153 bool result = false;
154 unsigned int i, numEntries, cdOffset;
155 unsigned int val;
156
157 /*
158 * The first 4 bytes of the file will either be the local header
159 * signature for the first file (kLFHSignature) or, if the archive doesn't
160 * have any files in it, the end-of-central-directory signature
161 * (kEOCDSignature).
162 */
163 val = get4LE(basePtr);
164 if (val == kEOCDSignature) {
165 LOGI("Found Zip archive, but it looks empty\n");
166 goto bail;
167 } else if (val != kLFHSignature) {
168 LOGV("Not a Zip archive (found 0x%08x)\n", val);
169 goto bail;
170 }
171
172 /*
173 * Find the EOCD. We'll find it immediately unless they have a file
174 * comment.
175 */
176 ptr = basePtr + length - kEOCDLen;
177
178 while (ptr >= basePtr) {
179 if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature)
180 break;
181 ptr--;
182 }
183 if (ptr < basePtr) {
184 LOGI("Could not find end-of-central-directory in Zip\n");
185 goto bail;
186 }
187
188 /*
189 * There are two interesting items in the EOCD block: the number of
190 * entries in the file, and the file offset of the start of the
191 * central directory.
192 *
193 * (There's actually a count of the #of entries in this file, and for
194 * all files which comprise a spanned archive, but for our purposes
195 * we're only interested in the current file. Besides, we expect the
196 * two to be equivalent for our stuff.)
197 */
198 numEntries = get2LE(ptr + kEOCDNumEntries);
199 cdOffset = get4LE(ptr + kEOCDFileOffset);
200
201 /* valid offsets are [0,EOCD] */
202 unsigned int maxOffset;
203 maxOffset = (ptr - basePtr) +1;
204
205 LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
206 if (numEntries == 0 || cdOffset >= length) {
207 LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
208 numEntries, cdOffset, length);
209 goto bail;
210 }
211
212 /*
213 * Create hash table. We have a minimum 75% load factor, possibly as
214 * low as 50% after we round off to a power of 2.
215 */
216 mNumEntries = numEntries;
217 mHashTableSize = roundUpPower2(1 + ((numEntries * 4) / 3));
218 mHashTable = (HashEntry*) calloc(1, sizeof(HashEntry) * mHashTableSize);
219
220 /*
221 * Walk through the central directory, adding entries to the hash
222 * table.
223 */
224 ptr = basePtr + cdOffset;
225 for (i = 0; i < numEntries; i++) {
226 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
227 const unsigned char* localHdr;
228 unsigned int hash;
229
230 if (get4LE(ptr) != kCDESignature) {
231 LOGW("Missed a central dir sig (at %d)\n", i);
232 goto bail;
233 }
234 if (ptr + kCDELen > basePtr + length) {
235 LOGW("Ran off the end (at %d)\n", i);
236 goto bail;
237 }
238
239 localHdrOffset = get4LE(ptr + kCDELocalOffset);
240 CHECK_OFFSET(localHdrOffset);
241 fileNameLen = get2LE(ptr + kCDENameLen);
242 extraLen = get2LE(ptr + kCDEExtraLen);
243 commentLen = get2LE(ptr + kCDECommentLen);
244
245 //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n",
246 // i, localHdrOffset, fileNameLen, extraLen, commentLen);
247 //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen);
248
249 /* add the CDE filename to the hash table */
250 hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
251 addToHash((const char*)ptr + kCDELen, fileNameLen, hash);
252
253 localHdr = basePtr + localHdrOffset;
254 if (get4LE(localHdr) != kLFHSignature) {
255 LOGW("Bad offset to local header: %d (at %d)\n",
256 localHdrOffset, i);
257 goto bail;
258 }
259
260 ptr += kCDELen + fileNameLen + extraLen + commentLen;
261 CHECK_OFFSET(ptr - basePtr);
262 }
263
264 result = true;
265
266bail:
267 return result;
268#undef CHECK_OFFSET
269}
270
271
272/*
273 * Simple string hash function for non-null-terminated strings.
274 */
275/*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len)
276{
277 unsigned int hash = 0;
278
279 while (len--)
280 hash = hash * 31 + *str++;
281
282 return hash;
283}
284
285/*
286 * Add a new entry to the hash table.
287 */
288void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash)
289{
290 int ent = hash & (mHashTableSize-1);
291
292 /*
293 * We over-allocate the table, so we're guaranteed to find an empty slot.
294 */
295 while (mHashTable[ent].name != NULL)
296 ent = (ent + 1) & (mHashTableSize-1);
297
298 mHashTable[ent].name = str;
299 mHashTable[ent].nameLen = strLen;
300}
301
302/*
303 * Find a matching entry.
304 *
305 * Returns 0 if not found.
306 */
307ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const
308{
309 int nameLen = strlen(fileName);
310 unsigned int hash = computeHash(fileName, nameLen);
311 int ent = hash & (mHashTableSize-1);
312
313 while (mHashTable[ent].name != NULL) {
314 if (mHashTable[ent].nameLen == nameLen &&
315 memcmp(mHashTable[ent].name, fileName, nameLen) == 0)
316 {
317 /* match */
318 return (ZipEntryRO) (ent + kZipEntryAdj);
319 }
320
321 ent = (ent + 1) & (mHashTableSize-1);
322 }
323
324 return NULL;
325}
326
327/*
328 * Find the Nth entry.
329 *
330 * This currently involves walking through the sparse hash table, counting
331 * non-empty entries. If we need to speed this up we can either allocate
332 * a parallel lookup table or (perhaps better) provide an iterator interface.
333 */
334ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const
335{
336 if (idx < 0 || idx >= mNumEntries) {
337 LOGW("Invalid index %d\n", idx);
338 return NULL;
339 }
340
341 for (int ent = 0; ent < mHashTableSize; ent++) {
342 if (mHashTable[ent].name != NULL) {
343 if (idx-- == 0)
344 return (ZipEntryRO) (ent + kZipEntryAdj);
345 }
346 }
347
348 return NULL;
349}
350
351/*
352 * Get the useful fields from the zip entry.
353 *
354 * Returns "false" if the offsets to the fields or the contents of the fields
355 * appear to be bogus.
356 */
357bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, long* pUncompLen,
358 long* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const
359{
360 int ent = entryToIndex(entry);
361 if (ent < 0)
362 return false;
363
364 /*
365 * Recover the start of the central directory entry from the filename
366 * pointer.
367 */
368 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr();
369 const unsigned char* ptr = (const unsigned char*) mHashTable[ent].name;
370 size_t zipLength = mFileMap->getDataLength();
371
372 ptr -= kCDELen;
373
374 int method = get2LE(ptr + kCDEMethod);
375 if (pMethod != NULL)
376 *pMethod = method;
377
378 if (pModWhen != NULL)
379 *pModWhen = get4LE(ptr + kCDEModWhen);
380 if (pCrc32 != NULL)
381 *pCrc32 = get4LE(ptr + kCDECRC);
382
383 /*
384 * We need to make sure that the lengths are not so large that somebody
385 * trying to map the compressed or uncompressed data runs off the end
386 * of the mapped region.
387 */
388 unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset);
389 if (localHdrOffset + kLFHLen >= zipLength) {
390 LOGE("ERROR: bad local hdr offset in zip\n");
391 return false;
392 }
393 const unsigned char* localHdr = basePtr + localHdrOffset;
394 off_t dataOffset = localHdrOffset + kLFHLen
395 + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen);
396 if ((unsigned long) dataOffset >= zipLength) {
397 LOGE("ERROR: bad data offset in zip\n");
398 return false;
399 }
400
401 if (pCompLen != NULL) {
402 *pCompLen = get4LE(ptr + kCDECompLen);
403 if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) {
404 LOGE("ERROR: bad compressed length in zip\n");
405 return false;
406 }
407 }
408 if (pUncompLen != NULL) {
409 *pUncompLen = get4LE(ptr + kCDEUncompLen);
410 if (*pUncompLen < 0) {
411 LOGE("ERROR: negative uncompressed length in zip\n");
412 return false;
413 }
414 if (method == kCompressStored &&
415 (size_t)(dataOffset + *pUncompLen) >= zipLength)
416 {
417 LOGE("ERROR: bad uncompressed length in zip\n");
418 return false;
419 }
420 }
421
422 if (pOffset != NULL) {
423 *pOffset = dataOffset;
424 }
425 return true;
426}
427
428/*
429 * Copy the entry's filename to the buffer.
430 */
431int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen)
432 const
433{
434 int ent = entryToIndex(entry);
435 if (ent < 0)
436 return -1;
437
438 int nameLen = mHashTable[ent].nameLen;
439 if (bufLen < nameLen+1)
440 return nameLen+1;
441
442 memcpy(buffer, mHashTable[ent].name, nameLen);
443 buffer[nameLen] = '\0';
444 return 0;
445}
446
447/*
448 * Create a new FileMap object that spans the data in "entry".
449 */
450FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const
451{
452 /*
453 * TODO: the efficient way to do this is to modify FileMap to allow
454 * sub-regions of a file to be mapped. A reference-counting scheme
455 * can manage the base memory mapping. For now, we just create a brand
456 * new mapping off of the Zip archive file descriptor.
457 */
458
459 FileMap* newMap;
460 long compLen;
461 off_t offset;
462
463 if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL))
464 return NULL;
465
466 newMap = new FileMap();
467 if (!newMap->create(mFileMap->getFileName(), mFd, offset, compLen, true)) {
468 newMap->release();
469 return NULL;
470 }
471
472 return newMap;
473}
474
475/*
476 * Uncompress an entry, in its entirety, into the provided output buffer.
477 *
478 * This doesn't verify the data's CRC, which might be useful for
479 * uncompressed data. The caller should be able to manage it.
480 */
481bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const
482{
483 const int kSequentialMin = 32768;
484 bool result = false;
485 int ent = entryToIndex(entry);
486 if (ent < 0)
487 return -1;
488
489 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr();
490 int method;
491 long uncompLen, compLen;
492 off_t offset;
493
494 getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
495
496 /*
497 * Experiment with madvise hint. When we want to uncompress a file,
498 * we pull some stuff out of the central dir entry and then hit a
499 * bunch of compressed or uncompressed data sequentially. The CDE
500 * visit will cause a limited amount of read-ahead because it's at
501 * the end of the file. We could end up doing lots of extra disk
502 * access if the file we're prying open is small. Bottom line is we
503 * probably don't want to turn MADV_SEQUENTIAL on and leave it on.
504 *
505 * So, if the compressed size of the file is above a certain minimum
506 * size, temporarily boost the read-ahead in the hope that the extra
507 * pair of system calls are negated by a reduction in page faults.
508 */
509 if (compLen > kSequentialMin)
510 mFileMap->advise(FileMap::SEQUENTIAL);
511
512 if (method == kCompressStored) {
513 memcpy(buffer, basePtr + offset, uncompLen);
514 } else {
515 if (!inflateBuffer(buffer, basePtr + offset, uncompLen, compLen))
516 goto bail;
517 }
518
519 if (compLen > kSequentialMin)
520 mFileMap->advise(FileMap::NORMAL);
521
522 result = true;
523
524bail:
525 return result;
526}
527
528/*
529 * Uncompress an entry, in its entirety, to an open file descriptor.
530 *
531 * This doesn't verify the data's CRC, but probably should.
532 */
533bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const
534{
535 bool result = false;
536 int ent = entryToIndex(entry);
537 if (ent < 0)
538 return -1;
539
540 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr();
541 int method;
542 long uncompLen, compLen;
543 off_t offset;
544
545 getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
546
547 if (method == kCompressStored) {
548 ssize_t actual;
549
550 actual = write(fd, basePtr + offset, uncompLen);
551 if (actual < 0) {
552 LOGE("Write failed: %s\n", strerror(errno));
553 goto bail;
554 } else if (actual != uncompLen) {
555 LOGE("Partial write during uncompress (%d of %ld)\n",
556 (int)actual, uncompLen);
557 goto bail;
558 } else {
559 LOGI("+++ successful write\n");
560 }
561 } else {
562 if (!inflateBuffer(fd, basePtr+offset, uncompLen, compLen))
563 goto bail;
564 }
565
566 result = true;
567
568bail:
569 return result;
570}
571
572/*
573 * Uncompress "deflate" data from one buffer to another.
574 */
575/*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf,
576 long uncompLen, long compLen)
577{
578 bool result = false;
579 z_stream zstream;
580 int zerr;
581
582 /*
583 * Initialize the zlib stream struct.
584 */
585 memset(&zstream, 0, sizeof(zstream));
586 zstream.zalloc = Z_NULL;
587 zstream.zfree = Z_NULL;
588 zstream.opaque = Z_NULL;
589 zstream.next_in = (Bytef*)inBuf;
590 zstream.avail_in = compLen;
591 zstream.next_out = (Bytef*) outBuf;
592 zstream.avail_out = uncompLen;
593 zstream.data_type = Z_UNKNOWN;
594
595 /*
596 * Use the undocumented "negative window bits" feature to tell zlib
597 * that there's no zlib header waiting for it.
598 */
599 zerr = inflateInit2(&zstream, -MAX_WBITS);
600 if (zerr != Z_OK) {
601 if (zerr == Z_VERSION_ERROR) {
602 LOGE("Installed zlib is not compatible with linked version (%s)\n",
603 ZLIB_VERSION);
604 } else {
605 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
606 }
607 goto bail;
608 }
609
610 /*
611 * Expand data.
612 */
613 zerr = inflate(&zstream, Z_FINISH);
614 if (zerr != Z_STREAM_END) {
615 LOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
616 zerr, zstream.next_in, zstream.avail_in,
617 zstream.next_out, zstream.avail_out);
618 goto z_bail;
619 }
620
621 /* paranoia */
622 if ((long) zstream.total_out != uncompLen) {
623 LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
624 zstream.total_out, uncompLen);
625 goto z_bail;
626 }
627
628 result = true;
629
630z_bail:
631 inflateEnd(&zstream); /* free up any allocated structures */
632
633bail:
634 return result;
635}
636
637/*
638 * Uncompress "deflate" data from one buffer to an open file descriptor.
639 */
640/*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf,
641 long uncompLen, long compLen)
642{
643 bool result = false;
644 const int kWriteBufSize = 32768;
645 unsigned char writeBuf[kWriteBufSize];
646 z_stream zstream;
647 int zerr;
648
649 /*
650 * Initialize the zlib stream struct.
651 */
652 memset(&zstream, 0, sizeof(zstream));
653 zstream.zalloc = Z_NULL;
654 zstream.zfree = Z_NULL;
655 zstream.opaque = Z_NULL;
656 zstream.next_in = (Bytef*)inBuf;
657 zstream.avail_in = compLen;
658 zstream.next_out = (Bytef*) writeBuf;
659 zstream.avail_out = sizeof(writeBuf);
660 zstream.data_type = Z_UNKNOWN;
661
662 /*
663 * Use the undocumented "negative window bits" feature to tell zlib
664 * that there's no zlib header waiting for it.
665 */
666 zerr = inflateInit2(&zstream, -MAX_WBITS);
667 if (zerr != Z_OK) {
668 if (zerr == Z_VERSION_ERROR) {
669 LOGE("Installed zlib is not compatible with linked version (%s)\n",
670 ZLIB_VERSION);
671 } else {
672 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
673 }
674 goto bail;
675 }
676
677 /*
678 * Loop while we have more to do.
679 */
680 do {
681 /*
682 * Expand data.
683 */
684 zerr = inflate(&zstream, Z_NO_FLUSH);
685 if (zerr != Z_OK && zerr != Z_STREAM_END) {
686 LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
687 zerr, zstream.next_in, zstream.avail_in,
688 zstream.next_out, zstream.avail_out);
689 goto z_bail;
690 }
691
692 /* write when we're full or when we're done */
693 if (zstream.avail_out == 0 ||
694 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
695 {
696 long writeSize = zstream.next_out - writeBuf;
697 int cc = write(fd, writeBuf, writeSize);
698 if (cc != (int) writeSize) {
699 LOGW("write failed in inflate (%d vs %ld)\n", cc, writeSize);
700 goto z_bail;
701 }
702
703 zstream.next_out = writeBuf;
704 zstream.avail_out = sizeof(writeBuf);
705 }
706 } while (zerr == Z_OK);
707
708 assert(zerr == Z_STREAM_END); /* other errors should've been caught */
709
710 /* paranoia */
711 if ((long) zstream.total_out != uncompLen) {
712 LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
713 zstream.total_out, uncompLen);
714 goto z_bail;
715 }
716
717 result = true;
718
719z_bail:
720 inflateEnd(&zstream); /* free up any allocated structures */
721
722bail:
723 return result;
724}