| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * Access the contents of a .dex file. |
| */ |
| |
| #include "DexFile.h" |
| #include "DexOptData.h" |
| #include "DexProto.h" |
| #include "DexCatch.h" |
| #include "Leb128.h" |
| #include "sha1.h" |
| #include "ZipArchive.h" |
| |
| #include <zlib.h> |
| |
| #include <stdlib.h> |
| #include <stddef.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| |
| |
| /* |
| * Verifying checksums is good, but it slows things down and causes us to |
| * touch every page. In the "optimized" world, it doesn't work at all, |
| * because we rewrite the contents. |
| */ |
| static const bool kVerifyChecksum = false; |
| static const bool kVerifySignature = false; |
| |
| /* (documented in header) */ |
| char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) { |
| const char* string = dexGetPrimitiveTypeDescriptor(type); |
| |
| return (string == NULL) ? '\0' : string[0]; |
| } |
| |
| /* (documented in header) */ |
| const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) { |
| switch (type) { |
| case PRIM_VOID: return "V"; |
| case PRIM_BOOLEAN: return "Z"; |
| case PRIM_BYTE: return "B"; |
| case PRIM_SHORT: return "S"; |
| case PRIM_CHAR: return "C"; |
| case PRIM_INT: return "I"; |
| case PRIM_LONG: return "J"; |
| case PRIM_FLOAT: return "F"; |
| case PRIM_DOUBLE: return "D"; |
| default: return NULL; |
| } |
| |
| return NULL; |
| } |
| |
| /* (documented in header) */ |
| const char* dexGetBoxedTypeDescriptor(PrimitiveType type) { |
| switch (type) { |
| case PRIM_VOID: return NULL; |
| case PRIM_BOOLEAN: return "Ljava/lang/Boolean;"; |
| case PRIM_BYTE: return "Ljava/lang/Byte;"; |
| case PRIM_SHORT: return "Ljava/lang/Short;"; |
| case PRIM_CHAR: return "Ljava/lang/Character;"; |
| case PRIM_INT: return "Ljava/lang/Integer;"; |
| case PRIM_LONG: return "Ljava/lang/Long;"; |
| case PRIM_FLOAT: return "Ljava/lang/Float;"; |
| case PRIM_DOUBLE: return "Ljava/lang/Double;"; |
| default: return NULL; |
| } |
| } |
| |
| /* (documented in header) */ |
| PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) { |
| switch (descriptorChar) { |
| case 'V': return PRIM_VOID; |
| case 'Z': return PRIM_BOOLEAN; |
| case 'B': return PRIM_BYTE; |
| case 'S': return PRIM_SHORT; |
| case 'C': return PRIM_CHAR; |
| case 'I': return PRIM_INT; |
| case 'J': return PRIM_LONG; |
| case 'F': return PRIM_FLOAT; |
| case 'D': return PRIM_DOUBLE; |
| default: return PRIM_NOT; |
| } |
| } |
| |
| /* Return the UTF-8 encoded string with the specified string_id index, |
| * also filling in the UTF-16 size (number of 16-bit code points).*/ |
| const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, |
| u4* utf16Size) { |
| const DexStringId* pStringId = dexGetStringId(pDexFile, idx); |
| const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; |
| |
| *utf16Size = readUnsignedLeb128(&ptr); |
| return (const char*) ptr; |
| } |
| |
| /* |
| * Format an SHA-1 digest for printing. tmpBuf must be able to hold at |
| * least kSHA1DigestOutputLen bytes. |
| */ |
| const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); |
| |
| /* |
| * Compute a SHA-1 digest on a range of bytes. |
| */ |
| static void dexComputeSHA1Digest(const unsigned char* data, size_t length, |
| unsigned char digest[]) |
| { |
| SHA1_CTX context; |
| SHA1Init(&context); |
| SHA1Update(&context, data, length); |
| SHA1Final(digest, &context); |
| } |
| |
| /* |
| * Format the SHA-1 digest into the buffer, which must be able to hold at |
| * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, |
| */ |
| static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) |
| { |
| static const char hexDigit[] = "0123456789abcdef"; |
| char* cp; |
| int i; |
| |
| cp = tmpBuf; |
| for (i = 0; i < kSHA1DigestLen; i++) { |
| *cp++ = hexDigit[digest[i] >> 4]; |
| *cp++ = hexDigit[digest[i] & 0x0f]; |
| } |
| *cp++ = '\0'; |
| |
| assert(cp == tmpBuf + kSHA1DigestOutputLen); |
| |
| return tmpBuf; |
| } |
| |
| /* |
| * Compute a hash code on a UTF-8 string, for use with internal hash tables. |
| * |
| * This may or may not be compatible with UTF-8 hash functions used inside |
| * the Dalvik VM. |
| * |
| * The basic "multiply by 31 and add" approach does better on class names |
| * than most other things tried (e.g. adler32). |
| */ |
| static u4 classDescriptorHash(const char* str) |
| { |
| u4 hash = 1; |
| |
| while (*str != '\0') |
| hash = hash * 31 + *str++; |
| |
| return hash; |
| } |
| |
| /* |
| * Add an entry to the class lookup table. We hash the string and probe |
| * until we find an open slot. |
| */ |
| static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, |
| int stringOff, int classDefOff, int* pNumProbes) |
| { |
| const char* classDescriptor = |
| (const char*) (pDexFile->baseAddr + stringOff); |
| const DexClassDef* pClassDef = |
| (const DexClassDef*) (pDexFile->baseAddr + classDefOff); |
| u4 hash = classDescriptorHash(classDescriptor); |
| int mask = pLookup->numEntries-1; |
| int idx = hash & mask; |
| |
| /* |
| * Find the first empty slot. We oversized the table, so this is |
| * guaranteed to finish. |
| */ |
| int probes = 0; |
| while (pLookup->table[idx].classDescriptorOffset != 0) { |
| idx = (idx + 1) & mask; |
| probes++; |
| } |
| //if (probes > 1) |
| // LOGW("classLookupAdd: probes=%d\n", probes); |
| |
| pLookup->table[idx].classDescriptorHash = hash; |
| pLookup->table[idx].classDescriptorOffset = stringOff; |
| pLookup->table[idx].classDefOffset = classDefOff; |
| *pNumProbes = probes; |
| } |
| |
| /* |
| * Create the class lookup hash table. |
| * |
| * Returns newly-allocated storage. |
| */ |
| DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) |
| { |
| DexClassLookup* pLookup; |
| int allocSize; |
| int i, numEntries; |
| int numProbes, totalProbes, maxProbes; |
| |
| numProbes = totalProbes = maxProbes = 0; |
| |
| assert(pDexFile != NULL); |
| |
| /* |
| * Using a factor of 3 results in far less probing than a factor of 2, |
| * but almost doubles the flash storage requirements for the bootstrap |
| * DEX files. The overall impact on class loading performance seems |
| * to be minor. We could probably get some performance improvement by |
| * using a secondary hash. |
| */ |
| numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); |
| allocSize = offsetof(DexClassLookup, table) |
| + numEntries * sizeof(pLookup->table[0]); |
| |
| pLookup = (DexClassLookup*) calloc(1, allocSize); |
| if (pLookup == NULL) |
| return NULL; |
| pLookup->size = allocSize; |
| pLookup->numEntries = numEntries; |
| |
| for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { |
| const DexClassDef* pClassDef; |
| const char* pString; |
| |
| pClassDef = dexGetClassDef(pDexFile, i); |
| pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); |
| |
| classLookupAdd(pDexFile, pLookup, |
| (u1*)pString - pDexFile->baseAddr, |
| (u1*)pClassDef - pDexFile->baseAddr, &numProbes); |
| |
| if (numProbes > maxProbes) |
| maxProbes = numProbes; |
| totalProbes += numProbes; |
| } |
| |
| LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" |
| " total=%d max=%d\n", |
| pDexFile->pHeader->classDefsSize, numEntries, |
| (100 * pDexFile->pHeader->classDefsSize) / numEntries, |
| allocSize, totalProbes, maxProbes); |
| |
| return pLookup; |
| } |
| |
| |
| /* |
| * Set up the basic raw data pointers of a DexFile. This function isn't |
| * meant for general use. |
| */ |
| void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { |
| DexHeader *pHeader = (DexHeader*) data; |
| |
| pDexFile->baseAddr = data; |
| pDexFile->pHeader = pHeader; |
| pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); |
| pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); |
| pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); |
| pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); |
| pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); |
| pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); |
| pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); |
| } |
| |
| /* |
| * Parse an optimized or unoptimized .dex file sitting in memory. This is |
| * called after the byte-ordering and structure alignment has been fixed up. |
| * |
| * On success, return a newly-allocated DexFile. |
| */ |
| DexFile* dexFileParse(const u1* data, size_t length, int flags) |
| { |
| DexFile* pDexFile = NULL; |
| const DexHeader* pHeader; |
| const u1* magic; |
| int result = -1; |
| |
| if (length < sizeof(DexHeader)) { |
| LOGE("too short to be a valid .dex\n"); |
| goto bail; /* bad file format */ |
| } |
| |
| pDexFile = (DexFile*) malloc(sizeof(DexFile)); |
| if (pDexFile == NULL) |
| goto bail; /* alloc failure */ |
| memset(pDexFile, 0, sizeof(DexFile)); |
| |
| /* |
| * Peel off the optimized header. |
| */ |
| if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { |
| magic = data; |
| if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { |
| LOGE("bad opt version (0x%02x %02x %02x %02x)\n", |
| magic[4], magic[5], magic[6], magic[7]); |
| goto bail; |
| } |
| |
| pDexFile->pOptHeader = (const DexOptHeader*) data; |
| LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", |
| pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); |
| |
| /* parse the optimized dex file tables */ |
| if (!dexParseOptData(data, length, pDexFile)) |
| goto bail; |
| |
| /* ignore the opt header and appended data from here on out */ |
| data += pDexFile->pOptHeader->dexOffset; |
| length -= pDexFile->pOptHeader->dexOffset; |
| if (pDexFile->pOptHeader->dexLength > length) { |
| LOGE("File truncated? stored len=%d, rem len=%d\n", |
| pDexFile->pOptHeader->dexLength, (int) length); |
| goto bail; |
| } |
| length = pDexFile->pOptHeader->dexLength; |
| } |
| |
| dexFileSetupBasicPointers(pDexFile, data); |
| pHeader = pDexFile->pHeader; |
| |
| magic = pHeader->magic; |
| if (memcmp(magic, DEX_MAGIC, 4) != 0) { |
| /* not expected */ |
| LOGE("bad magic number (0x%02x %02x %02x %02x)\n", |
| magic[0], magic[1], magic[2], magic[3]); |
| goto bail; |
| } |
| if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { |
| LOGE("bad dex version (0x%02x %02x %02x %02x)\n", |
| magic[4], magic[5], magic[6], magic[7]); |
| goto bail; |
| } |
| |
| /* |
| * Verify the checksum(s). This is reasonably quick, but does require |
| * touching every byte in the DEX file. The base checksum changes after |
| * byte-swapping and DEX optimization. |
| */ |
| if (flags & kDexParseVerifyChecksum) { |
| u4 adler = dexComputeChecksum(pHeader); |
| if (adler != pHeader->checksum) { |
| LOGE("ERROR: bad checksum (%08x vs %08x)\n", |
| adler, pHeader->checksum); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } else { |
| LOGV("+++ adler32 checksum (%08x) verified\n", adler); |
| } |
| |
| const DexOptHeader* pOptHeader = pDexFile->pOptHeader; |
| if (pOptHeader != NULL) { |
| adler = dexComputeOptChecksum(pOptHeader); |
| if (adler != pOptHeader->checksum) { |
| LOGE("ERROR: bad opt checksum (%08x vs %08x)\n", |
| adler, pOptHeader->checksum); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } else { |
| LOGV("+++ adler32 opt checksum (%08x) verified\n", adler); |
| } |
| } |
| } |
| |
| /* |
| * Verify the SHA-1 digest. (Normally we don't want to do this -- |
| * the digest is used to uniquely identify the original DEX file, and |
| * can't be computed for verification after the DEX is byte-swapped |
| * and optimized.) |
| */ |
| if (kVerifySignature) { |
| unsigned char sha1Digest[kSHA1DigestLen]; |
| const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + |
| kSHA1DigestLen; |
| |
| dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); |
| if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { |
| char tmpBuf1[kSHA1DigestOutputLen]; |
| char tmpBuf2[kSHA1DigestOutputLen]; |
| LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", |
| dexSHA1DigestToStr(sha1Digest, tmpBuf1), |
| dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } else { |
| LOGV("+++ sha1 digest verified\n"); |
| } |
| } |
| |
| if (pHeader->fileSize != length) { |
| LOGE("ERROR: stored file size (%d) != expected (%d)\n", |
| (int) pHeader->fileSize, (int) length); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } |
| |
| if (pHeader->classDefsSize == 0) { |
| LOGE("ERROR: DEX file has no classes in it, failing\n"); |
| goto bail; |
| } |
| |
| /* |
| * Success! |
| */ |
| result = 0; |
| |
| bail: |
| if (result != 0 && pDexFile != NULL) { |
| dexFileFree(pDexFile); |
| pDexFile = NULL; |
| } |
| return pDexFile; |
| } |
| |
| /* |
| * Free up the DexFile and any associated data structures. |
| * |
| * Note we may be called with a partially-initialized DexFile. |
| */ |
| void dexFileFree(DexFile* pDexFile) |
| { |
| if (pDexFile == NULL) |
| return; |
| |
| free(pDexFile); |
| } |
| |
| /* |
| * Look up a class definition entry by descriptor. |
| * |
| * "descriptor" should look like "Landroid/debug/Stuff;". |
| */ |
| const DexClassDef* dexFindClass(const DexFile* pDexFile, |
| const char* descriptor) |
| { |
| const DexClassLookup* pLookup = pDexFile->pClassLookup; |
| u4 hash; |
| int idx, mask; |
| |
| hash = classDescriptorHash(descriptor); |
| mask = pLookup->numEntries - 1; |
| idx = hash & mask; |
| |
| /* |
| * Search until we find a matching entry or an empty slot. |
| */ |
| while (true) { |
| int offset; |
| |
| offset = pLookup->table[idx].classDescriptorOffset; |
| if (offset == 0) |
| return NULL; |
| |
| if (pLookup->table[idx].classDescriptorHash == hash) { |
| const char* str; |
| |
| str = (const char*) (pDexFile->baseAddr + offset); |
| if (strcmp(str, descriptor) == 0) { |
| return (const DexClassDef*) |
| (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); |
| } |
| } |
| |
| idx = (idx + 1) & mask; |
| } |
| } |
| |
| |
| /* |
| * Compute the DEX file checksum for a memory-mapped DEX file. |
| */ |
| u4 dexComputeChecksum(const DexHeader* pHeader) |
| { |
| const u1* start = (const u1*) pHeader; |
| |
| uLong adler = adler32(0L, Z_NULL, 0); |
| const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); |
| |
| return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); |
| } |
| |
| /* |
| * Compute the size, in bytes, of a DexCode. |
| */ |
| size_t dexGetDexCodeSize(const DexCode* pCode) |
| { |
| /* |
| * The catch handler data is the last entry. It has a variable number |
| * of variable-size pieces, so we need to create an iterator. |
| */ |
| u4 handlersSize; |
| u4 offset; |
| u4 ui; |
| |
| if (pCode->triesSize != 0) { |
| handlersSize = dexGetHandlersSize(pCode); |
| offset = dexGetFirstHandlerOffset(pCode); |
| } else { |
| handlersSize = 0; |
| offset = 0; |
| } |
| |
| for (ui = 0; ui < handlersSize; ui++) { |
| DexCatchIterator iterator; |
| dexCatchIteratorInit(&iterator, pCode, offset); |
| offset = dexCatchIteratorGetEndOffset(&iterator, pCode); |
| } |
| |
| const u1* handlerData = dexGetCatchHandlerData(pCode); |
| |
| //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", |
| // pCode, handlerData, offset); |
| |
| /* return the size of the catch handler + everything before it */ |
| return (handlerData - (u1*) pCode) + offset; |
| } |
| |
| /* |
| * Round up to the next highest power of 2. |
| * |
| * Found on http://graphics.stanford.edu/~seander/bithacks.html. |
| */ |
| u4 dexRoundUpPower2(u4 val) |
| { |
| val--; |
| val |= val >> 1; |
| val |= val >> 2; |
| val |= val >> 4; |
| val |= val >> 8; |
| val |= val >> 16; |
| val++; |
| |
| return val; |
| } |