| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /* |
| * Read-only access to Zip archives, with minimal heap allocation. |
| */ |
| #include "ZipArchive.h" |
| |
| #include <zlib.h> |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| |
| |
| /* |
| * Zip file constants. |
| */ |
| #define kEOCDSignature 0x06054b50 |
| #define kEOCDLen 22 |
| #define kEOCDNumEntries 8 // offset to #of entries in file |
| #define kEOCDFileOffset 16 // offset to central directory |
| |
| #define kMaxCommentLen 65535 // longest possible in ushort |
| #define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) |
| |
| #define kLFHSignature 0x04034b50 |
| #define kLFHLen 30 // excluding variable-len fields |
| #define kLFHNameLen 26 // offset to filename length |
| #define kLFHExtraLen 28 // offset to extra length |
| |
| #define kCDESignature 0x02014b50 |
| #define kCDELen 46 // excluding variable-len fields |
| #define kCDEMethod 10 // offset to compression method |
| #define kCDEModWhen 12 // offset to modification timestamp |
| #define kCDECRC 16 // offset to entry CRC |
| #define kCDECompLen 20 // offset to compressed length |
| #define kCDEUncompLen 24 // offset to uncompressed length |
| #define kCDENameLen 28 // offset to filename length |
| #define kCDEExtraLen 30 // offset to extra length |
| #define kCDECommentLen 32 // offset to comment length |
| #define kCDELocalOffset 42 // offset to local hdr |
| |
| /* |
| * The values we return for ZipEntry use 0 as an invalid value, so we |
| * want to adjust the hash table index by a fixed amount. Using a large |
| * value helps insure that people don't mix & match arguments, e.g. with |
| * entry indices. |
| */ |
| #define kZipEntryAdj 10000 |
| |
| /* |
| * Convert a ZipEntry to a hash table index, verifying that it's in a |
| * valid range. |
| */ |
| static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry) |
| { |
| long ent = ((long) entry) - kZipEntryAdj; |
| if (ent < 0 || ent >= pArchive->mHashTableSize || |
| pArchive->mHashTable[ent].name == NULL) |
| { |
| LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent); |
| return -1; |
| } |
| return ent; |
| } |
| |
| /* |
| * Simple string hash function for non-null-terminated strings. |
| */ |
| static unsigned int computeHash(const char* str, int len) |
| { |
| unsigned int hash = 0; |
| |
| while (len--) |
| hash = hash * 31 + *str++; |
| |
| return hash; |
| } |
| |
| /* |
| * Add a new entry to the hash table. |
| */ |
| static void addToHash(ZipArchive* pArchive, const char* str, int strLen, |
| unsigned int hash) |
| { |
| const int hashTableSize = pArchive->mHashTableSize; |
| int ent = hash & (hashTableSize - 1); |
| |
| /* |
| * We over-allocated the table, so we're guaranteed to find an empty slot. |
| */ |
| while (pArchive->mHashTable[ent].name != NULL) |
| ent = (ent + 1) & (hashTableSize-1); |
| |
| pArchive->mHashTable[ent].name = str; |
| pArchive->mHashTable[ent].nameLen = strLen; |
| } |
| |
| /* |
| * Get 2 little-endian bytes. |
| */ |
| static u2 get2LE(unsigned char const* pSrc) |
| { |
| return pSrc[0] | (pSrc[1] << 8); |
| } |
| |
| /* |
| * Get 4 little-endian bytes. |
| */ |
| static u4 get4LE(unsigned char const* pSrc) |
| { |
| u4 result; |
| |
| result = pSrc[0]; |
| result |= pSrc[1] << 8; |
| result |= pSrc[2] << 16; |
| result |= pSrc[3] << 24; |
| |
| return result; |
| } |
| |
| /* |
| * Parse the Zip archive, verifying its contents and initializing internal |
| * data structures. |
| */ |
| static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) |
| { |
| #define CHECK_OFFSET(_off) { \ |
| if ((unsigned int) (_off) >= maxOffset) { \ |
| LOGE("ERROR: bad offset %u (max %d): %s\n", \ |
| (unsigned int) (_off), maxOffset, #_off); \ |
| goto bail; \ |
| } \ |
| } |
| bool result = false; |
| const unsigned char* basePtr = (const unsigned char*)pMap->addr; |
| const unsigned char* ptr; |
| size_t length = pMap->length; |
| unsigned int i, numEntries, cdOffset; |
| unsigned int val; |
| |
| /* |
| * The first 4 bytes of the file will either be the local header |
| * signature for the first file (kLFHSignature) or, if the archive doesn't |
| * have any files in it, the end-of-central-directory signature |
| * (kEOCDSignature). |
| */ |
| val = get4LE(basePtr); |
| if (val == kEOCDSignature) { |
| LOGI("Found Zip archive, but it looks empty\n"); |
| goto bail; |
| } else if (val != kLFHSignature) { |
| LOGV("Not a Zip archive (found 0x%08x)\n", val); |
| goto bail; |
| } |
| |
| /* |
| * Find the EOCD. We'll find it immediately unless they have a file |
| * comment. |
| */ |
| ptr = basePtr + length - kEOCDLen; |
| |
| while (ptr >= basePtr) { |
| if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) |
| break; |
| ptr--; |
| } |
| if (ptr < basePtr) { |
| LOGI("Could not find end-of-central-directory in Zip\n"); |
| goto bail; |
| } |
| |
| /* |
| * There are two interesting items in the EOCD block: the number of |
| * entries in the file, and the file offset of the start of the |
| * central directory. |
| * |
| * (There's actually a count of the #of entries in this file, and for |
| * all files which comprise a spanned archive, but for our purposes |
| * we're only interested in the current file. Besides, we expect the |
| * two to be equivalent for our stuff.) |
| */ |
| numEntries = get2LE(ptr + kEOCDNumEntries); |
| cdOffset = get4LE(ptr + kEOCDFileOffset); |
| |
| /* valid offsets are [0,EOCD] */ |
| unsigned int maxOffset; |
| maxOffset = (ptr - basePtr) +1; |
| |
| LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); |
| if (numEntries == 0 || cdOffset >= length) { |
| LOGW("Invalid entries=%d offset=%d (len=%zd)\n", |
| numEntries, cdOffset, length); |
| goto bail; |
| } |
| |
| /* |
| * Create hash table. We have a minimum 75% load factor, possibly as |
| * low as 50% after we round off to a power of 2. There must be at |
| * least one unused entry to avoid an infinite loop during creation. |
| */ |
| pArchive->mNumEntries = numEntries; |
| pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3); |
| pArchive->mHashTable = (ZipHashEntry*) |
| calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry)); |
| |
| /* |
| * Walk through the central directory, adding entries to the hash |
| * table. |
| */ |
| ptr = basePtr + cdOffset; |
| for (i = 0; i < numEntries; i++) { |
| unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; |
| const unsigned char* localHdr; |
| unsigned int hash; |
| |
| if (get4LE(ptr) != kCDESignature) { |
| LOGW("Missed a central dir sig (at %d)\n", i); |
| goto bail; |
| } |
| if (ptr + kCDELen > basePtr + length) { |
| LOGW("Ran off the end (at %d)\n", i); |
| goto bail; |
| } |
| |
| localHdrOffset = get4LE(ptr + kCDELocalOffset); |
| CHECK_OFFSET(localHdrOffset); |
| fileNameLen = get2LE(ptr + kCDENameLen); |
| extraLen = get2LE(ptr + kCDEExtraLen); |
| commentLen = get2LE(ptr + kCDECommentLen); |
| |
| //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", |
| // i, localHdrOffset, fileNameLen, extraLen, commentLen); |
| //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); |
| |
| /* add the CDE filename to the hash table */ |
| hash = computeHash((const char*)ptr + kCDELen, fileNameLen); |
| addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash); |
| |
| localHdr = basePtr + localHdrOffset; |
| if (get4LE(localHdr) != kLFHSignature) { |
| LOGW("Bad offset to local header: %d (at %d)\n", |
| localHdrOffset, i); |
| goto bail; |
| } |
| |
| ptr += kCDELen + fileNameLen + extraLen + commentLen; |
| CHECK_OFFSET(ptr - basePtr); |
| } |
| |
| result = true; |
| |
| bail: |
| return result; |
| #undef CHECK_OFFSET |
| } |
| |
| /* |
| * Open the specified file read-only. We memory-map the entire thing and |
| * parse the contents. |
| * |
| * This will be called on non-Zip files, especially during VM startup, so |
| * we don't want to be too noisy about certain types of failure. (Do |
| * we want a "quiet" flag?) |
| * |
| * On success, we fill out the contents of "pArchive" and return 0. |
| */ |
| int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive) |
| { |
| int fd, err; |
| |
| LOGV("Opening archive '%s' %p\n", fileName, pArchive); |
| |
| memset(pArchive, 0, sizeof(ZipArchive)); |
| |
| fd = open(fileName, O_RDONLY, 0); |
| if (fd < 0) { |
| err = errno ? errno : -1; |
| LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); |
| return err; |
| } |
| |
| return dexZipPrepArchive(fd, fileName, pArchive); |
| } |
| |
| /* |
| * Prepare to access a ZipArchive in an open file descriptor. |
| */ |
| int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive) |
| { |
| MemMapping map; |
| int err; |
| |
| map.addr = NULL; |
| memset(pArchive, 0, sizeof(*pArchive)); |
| |
| pArchive->mFd = fd; |
| |
| if (sysMapFileInShmem(pArchive->mFd, &map) != 0) { |
| err = -1; |
| LOGW("Map of '%s' failed\n", debugFileName); |
| goto bail; |
| } |
| |
| if (map.length < kEOCDLen) { |
| err = -1; |
| LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length); |
| goto bail; |
| } |
| |
| if (!parseZipArchive(pArchive, &map)) { |
| err = -1; |
| LOGV("Parsing '%s' failed\n", debugFileName); |
| goto bail; |
| } |
| |
| /* success */ |
| err = 0; |
| sysCopyMap(&pArchive->mMap, &map); |
| map.addr = NULL; |
| |
| bail: |
| if (err != 0) |
| dexZipCloseArchive(pArchive); |
| if (map.addr != NULL) |
| sysReleaseShmem(&map); |
| return err; |
| } |
| |
| |
| /* |
| * Close a ZipArchive, closing the file and freeing the contents. |
| * |
| * NOTE: the ZipArchive may not have been fully created. |
| */ |
| void dexZipCloseArchive(ZipArchive* pArchive) |
| { |
| LOGV("Closing archive %p\n", pArchive); |
| |
| if (pArchive->mFd >= 0) |
| close(pArchive->mFd); |
| |
| sysReleaseShmem(&pArchive->mMap); |
| |
| free(pArchive->mHashTable); |
| |
| pArchive->mFd = -1; |
| pArchive->mNumEntries = -1; |
| pArchive->mHashTableSize = -1; |
| pArchive->mHashTable = NULL; |
| } |
| |
| |
| /* |
| * Find a matching entry. |
| * |
| * Returns 0 if not found. |
| */ |
| ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName) |
| { |
| int nameLen = strlen(entryName); |
| unsigned int hash = computeHash(entryName, nameLen); |
| const int hashTableSize = pArchive->mHashTableSize; |
| int ent = hash & (hashTableSize-1); |
| |
| while (pArchive->mHashTable[ent].name != NULL) { |
| if (pArchive->mHashTable[ent].nameLen == nameLen && |
| memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0) |
| { |
| /* match */ |
| return (ZipEntry) (ent + kZipEntryAdj); |
| } |
| |
| ent = (ent + 1) & (hashTableSize-1); |
| } |
| |
| return NULL; |
| } |
| |
| #if 0 |
| /* |
| * Find the Nth entry. |
| * |
| * This currently involves walking through the sparse hash table, counting |
| * non-empty entries. If we need to speed this up we can either allocate |
| * a parallel lookup table or (perhaps better) provide an iterator interface. |
| */ |
| ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx) |
| { |
| if (idx < 0 || idx >= pArchive->mNumEntries) { |
| LOGW("Invalid index %d\n", idx); |
| return NULL; |
| } |
| |
| int ent; |
| for (ent = 0; ent < pArchive->mHashTableSize; ent++) { |
| if (pArchive->mHashTable[ent].name != NULL) { |
| if (idx-- == 0) |
| return (ZipEntry) (ent + kZipEntryAdj); |
| } |
| } |
| |
| return NULL; |
| } |
| #endif |
| |
| /* |
| * Get the useful fields from the zip entry. |
| * |
| * Returns "false" if the offsets to the fields or the contents of the fields |
| * appear to be bogus. |
| */ |
| bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, |
| int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset, |
| long* pModWhen, long* pCrc32) |
| { |
| int ent = entryToIndex(pArchive, entry); |
| if (ent < 0) |
| return false; |
| |
| /* |
| * Recover the start of the central directory entry from the filename |
| * pointer. |
| */ |
| const unsigned char* basePtr = (const unsigned char*) |
| pArchive->mMap.addr; |
| const unsigned char* ptr = (const unsigned char*) |
| pArchive->mHashTable[ent].name; |
| size_t zipLength = |
| pArchive->mMap.length; |
| |
| ptr -= kCDELen; |
| |
| int method = get2LE(ptr + kCDEMethod); |
| if (pMethod != NULL) |
| *pMethod = method; |
| |
| if (pModWhen != NULL) |
| *pModWhen = get4LE(ptr + kCDEModWhen); |
| if (pCrc32 != NULL) |
| *pCrc32 = get4LE(ptr + kCDECRC); |
| |
| /* |
| * We need to make sure that the lengths are not so large that somebody |
| * trying to map the compressed or uncompressed data runs off the end |
| * of the mapped region. |
| */ |
| unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); |
| if (localHdrOffset + kLFHLen >= zipLength) { |
| LOGE("ERROR: bad local hdr offset in zip\n"); |
| return false; |
| } |
| const unsigned char* localHdr = basePtr + localHdrOffset; |
| off_t dataOffset = localHdrOffset + kLFHLen |
| + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); |
| if ((unsigned long) dataOffset >= zipLength) { |
| LOGE("ERROR: bad data offset in zip\n"); |
| return false; |
| } |
| |
| if (pCompLen != NULL) { |
| *pCompLen = get4LE(ptr + kCDECompLen); |
| if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { |
| LOGE("ERROR: bad compressed length in zip\n"); |
| return false; |
| } |
| } |
| if (pUncompLen != NULL) { |
| *pUncompLen = get4LE(ptr + kCDEUncompLen); |
| if (*pUncompLen < 0) { |
| LOGE("ERROR: negative uncompressed length in zip\n"); |
| return false; |
| } |
| if (method == kCompressStored && |
| (size_t)(dataOffset + *pUncompLen) >= zipLength) |
| { |
| LOGE("ERROR: bad uncompressed length in zip\n"); |
| return false; |
| } |
| } |
| |
| if (pOffset != NULL) { |
| *pOffset = dataOffset; |
| } |
| return true; |
| } |
| |
| /* |
| * Uncompress "deflate" data from one buffer to an open file descriptor. |
| */ |
| static bool inflateToFile(int fd, const void* inBuf, long uncompLen, |
| long compLen) |
| { |
| bool result = false; |
| const int kWriteBufSize = 32768; |
| unsigned char writeBuf[kWriteBufSize]; |
| z_stream zstream; |
| int zerr; |
| |
| /* |
| * Initialize the zlib stream struct. |
| */ |
| memset(&zstream, 0, sizeof(zstream)); |
| zstream.zalloc = Z_NULL; |
| zstream.zfree = Z_NULL; |
| zstream.opaque = Z_NULL; |
| zstream.next_in = (Bytef*)inBuf; |
| zstream.avail_in = compLen; |
| zstream.next_out = (Bytef*) writeBuf; |
| zstream.avail_out = sizeof(writeBuf); |
| zstream.data_type = Z_UNKNOWN; |
| |
| /* |
| * Use the undocumented "negative window bits" feature to tell zlib |
| * that there's no zlib header waiting for it. |
| */ |
| zerr = inflateInit2(&zstream, -MAX_WBITS); |
| if (zerr != Z_OK) { |
| if (zerr == Z_VERSION_ERROR) { |
| LOGE("Installed zlib is not compatible with linked version (%s)\n", |
| ZLIB_VERSION); |
| } else { |
| LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); |
| } |
| goto bail; |
| } |
| |
| /* |
| * Loop while we have more to do. |
| */ |
| do { |
| /* |
| * Expand data. |
| */ |
| zerr = inflate(&zstream, Z_NO_FLUSH); |
| if (zerr != Z_OK && zerr != Z_STREAM_END) { |
| LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", |
| zerr, zstream.next_in, zstream.avail_in, |
| zstream.next_out, zstream.avail_out); |
| goto z_bail; |
| } |
| |
| /* write when we're full or when we're done */ |
| if (zstream.avail_out == 0 || |
| (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) |
| { |
| long writeSize = zstream.next_out - writeBuf; |
| int cc = write(fd, writeBuf, writeSize); |
| if (cc != (int) writeSize) { |
| if (cc < 0) { |
| LOGW("write failed in inflate: %s\n", strerror(errno)); |
| } else { |
| LOGW("partial write in inflate (%d vs %ld)\n", |
| cc, writeSize); |
| } |
| goto z_bail; |
| } |
| |
| zstream.next_out = writeBuf; |
| zstream.avail_out = sizeof(writeBuf); |
| } |
| } while (zerr == Z_OK); |
| |
| assert(zerr == Z_STREAM_END); /* other errors should've been caught */ |
| |
| /* paranoia */ |
| if ((long) zstream.total_out != uncompLen) { |
| LOGW("Size mismatch on inflated file (%ld vs %ld)\n", |
| zstream.total_out, uncompLen); |
| goto z_bail; |
| } |
| |
| result = true; |
| |
| z_bail: |
| inflateEnd(&zstream); /* free up any allocated structures */ |
| |
| bail: |
| return result; |
| } |
| |
| /* |
| * Uncompress an entry, in its entirety, to an open file descriptor. |
| * |
| * TODO: this doesn't verify the data's CRC, but probably should (especially |
| * for uncompressed data). |
| */ |
| bool dexZipExtractEntryToFile(const ZipArchive* pArchive, |
| const ZipEntry entry, int fd) |
| { |
| bool result = false; |
| int ent = entryToIndex(pArchive, entry); |
| if (ent < 0) |
| return -1; |
| |
| const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr; |
| int method; |
| long uncompLen, compLen; |
| off_t offset; |
| |
| if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen, |
| &offset, NULL, NULL)) |
| { |
| goto bail; |
| } |
| |
| if (method == kCompressStored) { |
| ssize_t actual; |
| |
| actual = write(fd, basePtr + offset, uncompLen); |
| if (actual < 0) { |
| LOGE("Write failed: %s\n", strerror(errno)); |
| goto bail; |
| } else if (actual != uncompLen) { |
| LOGE("Partial write during uncompress (%d of %ld)\n", |
| (int) actual, uncompLen); |
| goto bail; |
| } else { |
| LOGI("+++ successful write\n"); |
| } |
| } else { |
| if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen)) |
| goto bail; |
| } |
| |
| result = true; |
| |
| bail: |
| return result; |
| } |
| |