| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * Access the contents of a .dex file. |
| */ |
| |
| #include "DexFile.h" |
| #include "DexProto.h" |
| #include "DexCatch.h" |
| #include "Leb128.h" |
| #include "sha1.h" |
| #include "ZipArchive.h" |
| |
| #include <zlib.h> |
| |
| #include <stdlib.h> |
| #include <stddef.h> |
| #include <string.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| |
| /* |
| * Verifying checksums is good, but it slows things down and causes us to |
| * touch every page. In the "optimized" world, it doesn't work at all, |
| * because we rewrite the contents. |
| */ |
| static const bool kVerifyChecksum = false; |
| static const bool kVerifySignature = false; |
| |
| |
| /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode |
| * code point values for comparison. This treats different encodings |
| * for the same code point as equivalent, except that only a real '\0' |
| * byte is considered the string terminator. The return value is as |
| * for strcmp(). */ |
| int dexUtf8Cmp(const char* s1, const char* s2) { |
| for (;;) { |
| if (*s1 == '\0') { |
| if (*s2 == '\0') { |
| return 0; |
| } |
| return -1; |
| } else if (*s2 == '\0') { |
| return 1; |
| } |
| |
| int utf1 = dexGetUtf16FromUtf8(&s1); |
| int utf2 = dexGetUtf16FromUtf8(&s2); |
| int diff = utf1 - utf2; |
| |
| if (diff != 0) { |
| return diff; |
| } |
| } |
| } |
| |
| /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ |
| u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { |
| 0x00000000, // 00..1f low control characters; nothing valid |
| 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' |
| 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' |
| 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' |
| }; |
| |
| /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ |
| bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { |
| /* |
| * It's a multibyte encoded character. Decode it and analyze. We |
| * accept anything that isn't (a) an improperly encoded low value, |
| * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high |
| * control character, or (e) a high space, layout, or special |
| * character (U+00a0, U+2000..U+200f, U+2028..U+202f, |
| * U+fff0..U+ffff). |
| */ |
| |
| u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); |
| |
| // Perform follow-up tests based on the high 8 bits. |
| switch (utf16 >> 8) { |
| case 0x00: { |
| // It's only valid if it's above the ISO-8859-1 high space (0xa0). |
| return (utf16 > 0x00a0); |
| } |
| case 0xd8: |
| case 0xd9: |
| case 0xda: |
| case 0xdb: { |
| /* |
| * It's a leading surrogate. Check to see that a trailing |
| * surrogate follows. |
| */ |
| utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); |
| return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); |
| } |
| case 0xdc: |
| case 0xdd: |
| case 0xde: |
| case 0xdf: { |
| // It's a trailing surrogate, which is not valid at this point. |
| return false; |
| } |
| case 0x20: |
| case 0xff: { |
| // It's in the range that has spaces, controls, and specials. |
| switch (utf16 & 0xfff8) { |
| case 0x2000: |
| case 0x2008: |
| case 0x2028: |
| case 0xfff0: |
| case 0xfff8: { |
| return false; |
| } |
| } |
| break; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Return whether the given string is a valid field or method name. */ |
| bool dexIsValidMemberName(const char* s) { |
| bool angleName = false; |
| |
| switch (*s) { |
| case '\0': { |
| // The empty string is not a valid name. |
| return false; |
| } |
| case '<': { |
| /* |
| * '<' is allowed only at the start of a name, and if present, |
| * means that the name must end with '>'. |
| */ |
| angleName = true; |
| s++; |
| break; |
| } |
| } |
| |
| for (;;) { |
| switch (*s) { |
| case '\0': { |
| return !angleName; |
| } |
| case '>': { |
| return angleName && s[1] == '\0'; |
| } |
| } |
| if (!dexIsValidMemberNameUtf8(&s)) { |
| return false; |
| } |
| } |
| } |
| |
| /* Return whether the given string is a valid type descriptor. */ |
| bool dexIsValidTypeDescriptor(const char* s) { |
| int arrayCount = 0; |
| |
| while (*s == '[') { |
| arrayCount++; |
| s++; |
| } |
| |
| if (arrayCount > 255) { |
| // Arrays may have no more than 255 dimensions. |
| return false; |
| } |
| |
| switch (*(s++)) { |
| case 'B': |
| case 'C': |
| case 'D': |
| case 'F': |
| case 'I': |
| case 'J': |
| case 'S': |
| case 'Z': { |
| // These are all single-character descriptors for primitive types. |
| return (*s == '\0'); |
| } |
| case 'V': { |
| // You can't have an array of void. |
| return (arrayCount == 0) && (*s == '\0'); |
| } |
| case 'L': { |
| // Break out and continue below. |
| break; |
| } |
| default: { |
| // Oddball descriptor character. |
| return false; |
| } |
| } |
| |
| // We just consumed the 'L' that introduces a class name. |
| |
| bool slashOrFirst = true; // first character or just encountered a slash |
| for (;;) { |
| u1 c = (u1) *s; |
| switch (c) { |
| case '\0': { |
| // Premature end. |
| return false; |
| } |
| case ';': { |
| /* |
| * Make sure that this is the end of the string and that |
| * it doesn't end with an empty component (including the |
| * degenerate case of "L;"). |
| */ |
| return (s[1] == '\0') && !slashOrFirst; |
| } |
| case '/': { |
| if (slashOrFirst) { |
| // Slash at start or two slashes in a row. |
| return false; |
| } |
| slashOrFirst = true; |
| s++; |
| break; |
| } |
| default: { |
| if (!dexIsValidMemberNameUtf8(&s)) { |
| return false; |
| } |
| slashOrFirst = false; |
| break; |
| } |
| } |
| } |
| } |
| |
| /* Return whether the given string is a valid reference descriptor. This |
| * is true if dexIsValidTypeDescriptor() returns true and the descriptor |
| * is for a class or array and not a primitive type. */ |
| bool dexIsReferenceDescriptor(const char* s) { |
| if (!dexIsValidTypeDescriptor(s)) { |
| return false; |
| } |
| |
| return (s[0] == 'L') || (s[0] == '['); |
| } |
| |
| /* Return whether the given string is a valid class descriptor. This |
| * is true if dexIsValidTypeDescriptor() returns true and the descriptor |
| * is for a class and not an array or primitive type. */ |
| bool dexIsClassDescriptor(const char* s) { |
| if (!dexIsValidTypeDescriptor(s)) { |
| return false; |
| } |
| |
| return s[0] == 'L'; |
| } |
| |
| /* Return whether the given string is a valid field type descriptor. This |
| * is true if dexIsValidTypeDescriptor() returns true and the descriptor |
| * is for anything but "void". */ |
| bool dexIsFieldDescriptor(const char* s) { |
| if (!dexIsValidTypeDescriptor(s)) { |
| return false; |
| } |
| |
| return s[0] != 'V'; |
| } |
| |
| /* Return the UTF-8 encoded string with the specified string_id index, |
| * also filling in the UTF-16 size (number of 16-bit code points).*/ |
| const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, |
| u4* utf16Size) { |
| const DexStringId* pStringId = dexGetStringId(pDexFile, idx); |
| const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; |
| |
| *utf16Size = readUnsignedLeb128(&ptr); |
| return (const char*) ptr; |
| } |
| |
| /* |
| * Format an SHA-1 digest for printing. tmpBuf must be able to hold at |
| * least kSHA1DigestOutputLen bytes. |
| */ |
| const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); |
| |
| /* |
| * Compute a SHA-1 digest on a range of bytes. |
| */ |
| static void dexComputeSHA1Digest(const unsigned char* data, size_t length, |
| unsigned char digest[]) |
| { |
| SHA1_CTX context; |
| SHA1Init(&context); |
| SHA1Update(&context, data, length); |
| SHA1Final(digest, &context); |
| } |
| |
| /* |
| * Format the SHA-1 digest into the buffer, which must be able to hold at |
| * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, |
| */ |
| static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) |
| { |
| static const char hexDigit[] = "0123456789abcdef"; |
| char* cp; |
| int i; |
| |
| cp = tmpBuf; |
| for (i = 0; i < kSHA1DigestLen; i++) { |
| *cp++ = hexDigit[digest[i] >> 4]; |
| *cp++ = hexDigit[digest[i] & 0x0f]; |
| } |
| *cp++ = '\0'; |
| |
| assert(cp == tmpBuf + kSHA1DigestOutputLen); |
| |
| return tmpBuf; |
| } |
| |
| /* |
| * Compute a hash code on a UTF-8 string, for use with internal hash tables. |
| * |
| * This may or may not be compatible with UTF-8 hash functions used inside |
| * the Dalvik VM. |
| * |
| * The basic "multiply by 31 and add" approach does better on class names |
| * than most other things tried (e.g. adler32). |
| */ |
| static u4 classDescriptorHash(const char* str) |
| { |
| u4 hash = 1; |
| |
| while (*str != '\0') |
| hash = hash * 31 + *str++; |
| |
| return hash; |
| } |
| |
| /* |
| * Add an entry to the class lookup table. We hash the string and probe |
| * until we find an open slot. |
| */ |
| static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, |
| int stringOff, int classDefOff, int* pNumProbes) |
| { |
| const char* classDescriptor = |
| (const char*) (pDexFile->baseAddr + stringOff); |
| const DexClassDef* pClassDef = |
| (const DexClassDef*) (pDexFile->baseAddr + classDefOff); |
| u4 hash = classDescriptorHash(classDescriptor); |
| int mask = pLookup->numEntries-1; |
| int idx = hash & mask; |
| |
| /* |
| * Find the first empty slot. We oversized the table, so this is |
| * guaranteed to finish. |
| */ |
| int probes = 0; |
| while (pLookup->table[idx].classDescriptorOffset != 0) { |
| idx = (idx + 1) & mask; |
| probes++; |
| } |
| //if (probes > 1) |
| // LOGW("classLookupAdd: probes=%d\n", probes); |
| |
| pLookup->table[idx].classDescriptorHash = hash; |
| pLookup->table[idx].classDescriptorOffset = stringOff; |
| pLookup->table[idx].classDefOffset = classDefOff; |
| *pNumProbes = probes; |
| } |
| |
| /* |
| * Round up to the next highest power of 2. |
| * |
| * Found on http://graphics.stanford.edu/~seander/bithacks.html. |
| */ |
| u4 dexRoundUpPower2(u4 val) |
| { |
| val--; |
| val |= val >> 1; |
| val |= val >> 2; |
| val |= val >> 4; |
| val |= val >> 8; |
| val |= val >> 16; |
| val++; |
| |
| return val; |
| } |
| |
| /* |
| * Create the class lookup hash table. |
| * |
| * Returns newly-allocated storage. |
| */ |
| DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) |
| { |
| DexClassLookup* pLookup; |
| int allocSize; |
| int i, numEntries; |
| int numProbes, totalProbes, maxProbes; |
| |
| numProbes = totalProbes = maxProbes = 0; |
| |
| assert(pDexFile != NULL); |
| |
| /* |
| * Using a factor of 3 results in far less probing than a factor of 2, |
| * but almost doubles the flash storage requirements for the bootstrap |
| * DEX files. The overall impact on class loading performance seems |
| * to be minor. We could probably get some performance improvement by |
| * using a secondary hash. |
| */ |
| numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); |
| allocSize = offsetof(DexClassLookup, table) |
| + numEntries * sizeof(pLookup->table[0]); |
| |
| pLookup = (DexClassLookup*) calloc(1, allocSize); |
| if (pLookup == NULL) |
| return NULL; |
| pLookup->size = allocSize; |
| pLookup->numEntries = numEntries; |
| |
| for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { |
| const DexClassDef* pClassDef; |
| const char* pString; |
| |
| pClassDef = dexGetClassDef(pDexFile, i); |
| pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); |
| |
| classLookupAdd(pDexFile, pLookup, |
| (u1*)pString - pDexFile->baseAddr, |
| (u1*)pClassDef - pDexFile->baseAddr, &numProbes); |
| |
| if (numProbes > maxProbes) |
| maxProbes = numProbes; |
| totalProbes += numProbes; |
| } |
| |
| LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" |
| " total=%d max=%d\n", |
| pDexFile->pHeader->classDefsSize, numEntries, |
| (100 * pDexFile->pHeader->classDefsSize) / numEntries, |
| allocSize, totalProbes, maxProbes); |
| |
| return pLookup; |
| } |
| |
| |
| /* |
| * Set up the basic raw data pointers of a DexFile. This function isn't |
| * meant for general use. |
| */ |
| void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { |
| DexHeader *pHeader = (DexHeader*) data; |
| |
| pDexFile->baseAddr = data; |
| pDexFile->pHeader = pHeader; |
| pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); |
| pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); |
| pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); |
| pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); |
| pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); |
| pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); |
| pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); |
| } |
| |
| |
| /* |
| * Parse out an index map entry, advancing "*pData" and reducing "*pSize". |
| */ |
| static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding, |
| u4* pFullCount, u4* pReducedCount, const u2** pMap) |
| { |
| const u4* wordPtr = (const u4*) *pData; |
| u4 size = *pSize; |
| u4 mapCount; |
| |
| if (expanding) { |
| if (size < 4) |
| return false; |
| mapCount = *pReducedCount = *wordPtr++; |
| *pFullCount = (u4) -1; |
| size -= sizeof(u4); |
| } else { |
| if (size < 8) |
| return false; |
| mapCount = *pFullCount = *wordPtr++; |
| *pReducedCount = *wordPtr++; |
| size -= sizeof(u4) * 2; |
| } |
| |
| u4 mapSize = mapCount * sizeof(u2); |
| |
| if (size < mapSize) |
| return false; |
| *pMap = (const u2*) wordPtr; |
| size -= mapSize; |
| |
| /* advance the pointer */ |
| const u1* ptr = (const u1*) wordPtr; |
| ptr += (mapSize + 3) & ~0x3; |
| |
| /* update pass-by-reference values */ |
| *pData = (const u1*) ptr; |
| *pSize = size; |
| |
| return true; |
| } |
| |
| /* |
| * Set up some pointers into the mapped data. |
| * |
| * See analysis/ReduceConstants.c for the data layout description. |
| */ |
| static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size, |
| bool expanding) |
| { |
| if (!parseIndexMapEntry(&data, &size, expanding, |
| &pDexFile->indexMap.classFullCount, |
| &pDexFile->indexMap.classReducedCount, |
| &pDexFile->indexMap.classMap)) |
| { |
| return false; |
| } |
| |
| if (!parseIndexMapEntry(&data, &size, expanding, |
| &pDexFile->indexMap.methodFullCount, |
| &pDexFile->indexMap.methodReducedCount, |
| &pDexFile->indexMap.methodMap)) |
| { |
| return false; |
| } |
| |
| if (!parseIndexMapEntry(&data, &size, expanding, |
| &pDexFile->indexMap.fieldFullCount, |
| &pDexFile->indexMap.fieldReducedCount, |
| &pDexFile->indexMap.fieldMap)) |
| { |
| return false; |
| } |
| |
| if (!parseIndexMapEntry(&data, &size, expanding, |
| &pDexFile->indexMap.stringFullCount, |
| &pDexFile->indexMap.stringReducedCount, |
| &pDexFile->indexMap.stringMap)) |
| { |
| return false; |
| } |
| |
| if (expanding) { |
| /* |
| * The map includes the "reduced" counts; pull the original counts |
| * out of the DexFile so that code has a consistent source. |
| */ |
| assert(pDexFile->indexMap.classFullCount == (u4) -1); |
| assert(pDexFile->indexMap.methodFullCount == (u4) -1); |
| assert(pDexFile->indexMap.fieldFullCount == (u4) -1); |
| assert(pDexFile->indexMap.stringFullCount == (u4) -1); |
| |
| #if 0 // TODO: not available yet -- do later or just skip this |
| pDexFile->indexMap.classFullCount = |
| pDexFile->pHeader->typeIdsSize; |
| pDexFile->indexMap.methodFullCount = |
| pDexFile->pHeader->methodIdsSize; |
| pDexFile->indexMap.fieldFullCount = |
| pDexFile->pHeader->fieldIdsSize; |
| pDexFile->indexMap.stringFullCount = |
| pDexFile->pHeader->stringIdsSize; |
| #endif |
| } |
| |
| LOGI("Class : %u %u %u\n", |
| pDexFile->indexMap.classFullCount, |
| pDexFile->indexMap.classReducedCount, |
| pDexFile->indexMap.classMap[0]); |
| LOGI("Method: %u %u %u\n", |
| pDexFile->indexMap.methodFullCount, |
| pDexFile->indexMap.methodReducedCount, |
| pDexFile->indexMap.methodMap[0]); |
| LOGI("Field : %u %u %u\n", |
| pDexFile->indexMap.fieldFullCount, |
| pDexFile->indexMap.fieldReducedCount, |
| pDexFile->indexMap.fieldMap[0]); |
| LOGI("String: %u %u %u\n", |
| pDexFile->indexMap.stringFullCount, |
| pDexFile->indexMap.stringReducedCount, |
| pDexFile->indexMap.stringMap[0]); |
| |
| return true; |
| } |
| |
| /* |
| * Parse some auxillary data tables. |
| * |
| * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup |
| * table. Subsequent versions switched to the "chunk" format. |
| */ |
| static bool parseAuxData(const u1* data, DexFile* pDexFile) |
| { |
| const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset); |
| u4 indexMapType = 0; |
| |
| /* v1.0 format? */ |
| if (*pAux == 0) { |
| LOGV("+++ found OLD dex format\n"); |
| pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1); |
| return true; |
| } |
| LOGV("+++ found NEW dex format\n"); |
| |
| /* process chunks until we see the end marker */ |
| while (*pAux != kDexChunkEnd) { |
| u4 size = *(pAux+1); |
| u1* data = (u1*) (pAux + 2); |
| |
| switch (*pAux) { |
| case kDexChunkClassLookup: |
| pDexFile->pClassLookup = (const DexClassLookup*) data; |
| break; |
| case kDexChunkReducingIndexMap: |
| LOGI("+++ found reducing index map, size=%u\n", size); |
| if (!parseIndexMap(pDexFile, data, size, false)) { |
| LOGE("Failed parsing reducing index map\n"); |
| return false; |
| } |
| indexMapType = *pAux; |
| break; |
| case kDexChunkExpandingIndexMap: |
| LOGI("+++ found expanding index map, size=%u\n", size); |
| if (!parseIndexMap(pDexFile, data, size, true)) { |
| LOGE("Failed parsing expanding index map\n"); |
| return false; |
| } |
| indexMapType = *pAux; |
| break; |
| case kDexChunkRegisterMaps: |
| LOGV("+++ found register maps, size=%u\n", size); |
| pDexFile->pRegisterMapPool = data; |
| break; |
| default: |
| LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n", |
| *pAux, |
| (char) ((*pAux) >> 24), (char) ((*pAux) >> 16), |
| (char) ((*pAux) >> 8), (char) (*pAux), |
| size); |
| break; |
| } |
| |
| /* |
| * Advance pointer, padding to 64-bit boundary. The extra "+8" is |
| * for the type/size header. |
| */ |
| size = (size + 8 + 7) & ~7; |
| pAux += size / sizeof(u4); |
| } |
| |
| #if 0 // TODO: propagate expected map type from the VM through the API |
| /* |
| * If we're configured to expect an index map, and we don't find one, |
| * reject this DEX so we'll regenerate it. Also, if we found an |
| * "expanding" map but we're not configured to use it, we have to fail |
| * because the constants aren't usable without translation. |
| */ |
| if (indexMapType != expectedIndexMapType) { |
| LOGW("Incompatible index map configuration: found 0x%04x, need %d\n", |
| indexMapType, DVM_REDUCE_CONSTANTS); |
| return false; |
| } |
| #endif |
| |
| return true; |
| } |
| |
| /* |
| * Parse an optimized or unoptimized .dex file sitting in memory. This is |
| * called after the byte-ordering and structure alignment has been fixed up. |
| * |
| * On success, return a newly-allocated DexFile. |
| */ |
| DexFile* dexFileParse(const u1* data, size_t length, int flags) |
| { |
| DexFile* pDexFile = NULL; |
| const DexHeader* pHeader; |
| const u1* magic; |
| int result = -1; |
| |
| if (length < sizeof(DexHeader)) { |
| LOGE("too short to be a valid .dex\n"); |
| goto bail; /* bad file format */ |
| } |
| |
| pDexFile = (DexFile*) malloc(sizeof(DexFile)); |
| if (pDexFile == NULL) |
| goto bail; /* alloc failure */ |
| memset(pDexFile, 0, sizeof(DexFile)); |
| |
| /* |
| * Peel off the optimized header. |
| */ |
| if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { |
| magic = data; |
| if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { |
| LOGE("bad opt version (0x%02x %02x %02x %02x)\n", |
| magic[4], magic[5], magic[6], magic[7]); |
| goto bail; |
| } |
| |
| pDexFile->pOptHeader = (const DexOptHeader*) data; |
| LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", |
| pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); |
| |
| /* locate some auxillary data tables */ |
| if (!parseAuxData(data, pDexFile)) |
| goto bail; |
| |
| /* ignore the opt header and appended data from here on out */ |
| data += pDexFile->pOptHeader->dexOffset; |
| length -= pDexFile->pOptHeader->dexOffset; |
| if (pDexFile->pOptHeader->dexLength > length) { |
| LOGE("File truncated? stored len=%d, rem len=%d\n", |
| pDexFile->pOptHeader->dexLength, (int) length); |
| goto bail; |
| } |
| length = pDexFile->pOptHeader->dexLength; |
| } |
| |
| dexFileSetupBasicPointers(pDexFile, data); |
| pHeader = pDexFile->pHeader; |
| |
| magic = pHeader->magic; |
| if (memcmp(magic, DEX_MAGIC, 4) != 0) { |
| /* not expected */ |
| LOGE("bad magic number (0x%02x %02x %02x %02x)\n", |
| magic[0], magic[1], magic[2], magic[3]); |
| goto bail; |
| } |
| if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { |
| LOGE("bad dex version (0x%02x %02x %02x %02x)\n", |
| magic[4], magic[5], magic[6], magic[7]); |
| goto bail; |
| } |
| |
| /* |
| * Verify the checksum. This is reasonably quick, but does require |
| * touching every byte in the DEX file. The checksum changes after |
| * byte-swapping and DEX optimization. |
| */ |
| if (flags & kDexParseVerifyChecksum) { |
| u4 adler = dexComputeChecksum(pHeader); |
| if (adler != pHeader->checksum) { |
| LOGE("ERROR: bad checksum (%08x vs %08x)\n", |
| adler, pHeader->checksum); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } else { |
| LOGV("+++ adler32 checksum (%08x) verified\n", adler); |
| } |
| } |
| |
| /* |
| * Verify the SHA-1 digest. (Normally we don't want to do this -- |
| * the digest is used to uniquely identify a DEX file, and can't be |
| * computed post-optimization.) |
| * |
| * The digest will be invalid after byte swapping and DEX optimization. |
| */ |
| if (kVerifySignature) { |
| unsigned char sha1Digest[kSHA1DigestLen]; |
| const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + |
| kSHA1DigestLen; |
| |
| dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); |
| if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { |
| char tmpBuf1[kSHA1DigestOutputLen]; |
| char tmpBuf2[kSHA1DigestOutputLen]; |
| LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", |
| dexSHA1DigestToStr(sha1Digest, tmpBuf1), |
| dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } else { |
| LOGV("+++ sha1 digest verified\n"); |
| } |
| } |
| |
| if (pHeader->fileSize != length) { |
| LOGE("ERROR: stored file size (%d) != expected (%d)\n", |
| (int) pHeader->fileSize, (int) length); |
| if (!(flags & kDexParseContinueOnError)) |
| goto bail; |
| } |
| |
| if (pHeader->classDefsSize == 0) { |
| LOGE("ERROR: DEX file has no classes in it, failing\n"); |
| goto bail; |
| } |
| |
| /* |
| * Success! |
| */ |
| result = 0; |
| |
| bail: |
| if (result != 0 && pDexFile != NULL) { |
| dexFileFree(pDexFile); |
| pDexFile = NULL; |
| } |
| return pDexFile; |
| } |
| |
| /* |
| * Free up the DexFile and any associated data structures. |
| * |
| * Note we may be called with a partially-initialized DexFile. |
| */ |
| void dexFileFree(DexFile* pDexFile) |
| { |
| if (pDexFile == NULL) |
| return; |
| |
| free(pDexFile); |
| } |
| |
| /* |
| * Look up a class definition entry by descriptor. |
| * |
| * "descriptor" should look like "Landroid/debug/Stuff;". |
| */ |
| const DexClassDef* dexFindClass(const DexFile* pDexFile, |
| const char* descriptor) |
| { |
| const DexClassLookup* pLookup = pDexFile->pClassLookup; |
| u4 hash; |
| int idx, mask; |
| |
| hash = classDescriptorHash(descriptor); |
| mask = pLookup->numEntries - 1; |
| idx = hash & mask; |
| |
| /* |
| * Search until we find a matching entry or an empty slot. |
| */ |
| while (true) { |
| int offset; |
| |
| offset = pLookup->table[idx].classDescriptorOffset; |
| if (offset == 0) |
| return NULL; |
| |
| if (pLookup->table[idx].classDescriptorHash == hash) { |
| const char* str; |
| |
| str = (const char*) (pDexFile->baseAddr + offset); |
| if (strcmp(str, descriptor) == 0) { |
| return (const DexClassDef*) |
| (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); |
| } |
| } |
| |
| idx = (idx + 1) & mask; |
| } |
| } |
| |
| |
| /* |
| * Compute the DEX file checksum for a memory-mapped DEX file. |
| */ |
| u4 dexComputeChecksum(const DexHeader* pHeader) |
| { |
| const u1* start = (const u1*) pHeader; |
| |
| uLong adler = adler32(0L, Z_NULL, 0); |
| const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); |
| |
| return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); |
| } |
| |
| |
| /* |
| * Compute the size, in bytes, of a DexCode. |
| */ |
| size_t dexGetDexCodeSize(const DexCode* pCode) |
| { |
| /* |
| * The catch handler data is the last entry. It has a variable number |
| * of variable-size pieces, so we need to create an iterator. |
| */ |
| u4 handlersSize; |
| u4 offset; |
| u4 ui; |
| |
| if (pCode->triesSize != 0) { |
| handlersSize = dexGetHandlersSize(pCode); |
| offset = dexGetFirstHandlerOffset(pCode); |
| } else { |
| handlersSize = 0; |
| offset = 0; |
| } |
| |
| for (ui = 0; ui < handlersSize; ui++) { |
| DexCatchIterator iterator; |
| dexCatchIteratorInit(&iterator, pCode, offset); |
| offset = dexCatchIteratorGetEndOffset(&iterator, pCode); |
| } |
| |
| const u1* handlerData = dexGetCatchHandlerData(pCode); |
| |
| //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", |
| // pCode, handlerData, offset); |
| |
| /* return the size of the catch handler + everything before it */ |
| return (handlerData - (u1*) pCode) + offset; |
| } |
| |
| |
| /* |
| * =========================================================================== |
| * Debug info |
| * =========================================================================== |
| */ |
| |
| /* |
| * Decode the arguments in a method signature, which looks something |
| * like "(ID[Ljava/lang/String;)V". |
| * |
| * Returns the type signature letter for the next argument, or ')' if |
| * there are no more args. Advances "pSig" to point to the character |
| * after the one returned. |
| */ |
| static char decodeSignature(const char** pSig) |
| { |
| const char* sig = *pSig; |
| |
| if (*sig == '(') |
| sig++; |
| |
| if (*sig == 'L') { |
| /* object ref */ |
| while (*++sig != ';') |
| ; |
| *pSig = sig+1; |
| return 'L'; |
| } |
| if (*sig == '[') { |
| /* array; advance past array type */ |
| while (*++sig == '[') |
| ; |
| if (*sig == 'L') { |
| while (*++sig != ';') |
| ; |
| } |
| *pSig = sig+1; |
| return '['; |
| } |
| if (*sig == '\0') |
| return *sig; /* don't advance further */ |
| |
| *pSig = sig+1; |
| return *sig; |
| } |
| |
| /* |
| * returns the length of a type string, given the start of the |
| * type string. Used for the case where the debug info format |
| * references types that are inside a method type signature. |
| */ |
| static int typeLength (const char *type) { |
| // Assumes any leading '(' has already been gobbled |
| const char *end = type; |
| decodeSignature(&end); |
| return end - type; |
| } |
| |
| /* |
| * Reads a string index as encoded for the debug info format, |
| * returning a string pointer or NULL as appropriate. |
| */ |
| static const char* readStringIdx(const DexFile* pDexFile, |
| const u1** pStream) { |
| u4 stringIdx = readUnsignedLeb128(pStream); |
| |
| // Remember, encoded string indicies have 1 added to them. |
| if (stringIdx == 0) { |
| return NULL; |
| } else { |
| return dexStringById(pDexFile, stringIdx - 1); |
| } |
| } |
| |
| /* |
| * Reads a type index as encoded for the debug info format, returning |
| * a string pointer for its descriptor or NULL as appropriate. |
| */ |
| static const char* readTypeIdx(const DexFile* pDexFile, |
| const u1** pStream) { |
| u4 typeIdx = readUnsignedLeb128(pStream); |
| |
| // Remember, encoded type indicies have 1 added to them. |
| if (typeIdx == 0) { |
| return NULL; |
| } else { |
| return dexStringByTypeIdx(pDexFile, typeIdx - 1); |
| } |
| } |
| |
| /* access_flag value indicating that a method is static */ |
| #define ACC_STATIC 0x0008 |
| |
| typedef struct LocalInfo { |
| const char *name; |
| const char *descriptor; |
| const char *signature; |
| u2 startAddress; |
| bool live; |
| } LocalInfo; |
| |
| static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, |
| LocalInfo *localInReg, DexDebugNewLocalCb localCb) |
| { |
| if (localCb != NULL && localInReg[reg].live) { |
| localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, |
| localInReg[reg].name, |
| localInReg[reg].descriptor, |
| localInReg[reg].signature == NULL |
| ? "" : localInReg[reg].signature ); |
| } |
| } |
| |
| // TODO optimize localCb == NULL case |
| void dexDecodeDebugInfo( |
| const DexFile* pDexFile, |
| const DexCode* pCode, |
| const char* classDescriptor, |
| u4 protoIdx, |
| u4 accessFlags, |
| DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, |
| void* cnxt) |
| { |
| const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); |
| u4 line; |
| u4 parametersSize; |
| u4 address = 0; |
| LocalInfo localInReg[pCode->registersSize]; |
| u4 insnsSize = pCode->insnsSize; |
| DexProto proto = { pDexFile, protoIdx }; |
| |
| memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); |
| |
| if (stream == NULL) { |
| goto end; |
| } |
| |
| line = readUnsignedLeb128(&stream); |
| parametersSize = readUnsignedLeb128(&stream); |
| |
| u2 argReg = pCode->registersSize - pCode->insSize; |
| |
| if ((accessFlags & ACC_STATIC) == 0) { |
| /* |
| * The code is an instance method, which means that there is |
| * an initial this parameter. Also, the proto list should |
| * contain exactly one fewer argument word than the insSize |
| * indicates. |
| */ |
| assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); |
| localInReg[argReg].name = "this"; |
| localInReg[argReg].descriptor = classDescriptor; |
| localInReg[argReg].startAddress = 0; |
| localInReg[argReg].live = true; |
| argReg++; |
| } else { |
| assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); |
| } |
| |
| DexParameterIterator iterator; |
| dexParameterIteratorInit(&iterator, &proto); |
| |
| while (parametersSize-- != 0) { |
| const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); |
| const char *name; |
| int reg; |
| |
| if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { |
| goto invalid_stream; |
| } |
| |
| name = readStringIdx(pDexFile, &stream); |
| reg = argReg; |
| |
| switch (descriptor[0]) { |
| case 'D': |
| case 'J': |
| argReg += 2; |
| break; |
| default: |
| argReg += 1; |
| break; |
| } |
| |
| if (name != NULL) { |
| localInReg[reg].name = name; |
| localInReg[reg].descriptor = descriptor; |
| localInReg[reg].signature = NULL; |
| localInReg[reg].startAddress = address; |
| localInReg[reg].live = true; |
| } |
| } |
| |
| for (;;) { |
| u1 opcode = *stream++; |
| u2 reg; |
| |
| switch (opcode) { |
| case DBG_END_SEQUENCE: |
| goto end; |
| |
| case DBG_ADVANCE_PC: |
| address += readUnsignedLeb128(&stream); |
| break; |
| |
| case DBG_ADVANCE_LINE: |
| line += readSignedLeb128(&stream); |
| break; |
| |
| case DBG_START_LOCAL: |
| case DBG_START_LOCAL_EXTENDED: |
| reg = readUnsignedLeb128(&stream); |
| if (reg > pCode->registersSize) goto invalid_stream; |
| |
| // Emit what was previously there, if anything |
| emitLocalCbIfLive (cnxt, reg, address, |
| localInReg, localCb); |
| |
| localInReg[reg].name = readStringIdx(pDexFile, &stream); |
| localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); |
| if (opcode == DBG_START_LOCAL_EXTENDED) { |
| localInReg[reg].signature |
| = readStringIdx(pDexFile, &stream); |
| } else { |
| localInReg[reg].signature = NULL; |
| } |
| localInReg[reg].startAddress = address; |
| localInReg[reg].live = true; |
| break; |
| |
| case DBG_END_LOCAL: |
| reg = readUnsignedLeb128(&stream); |
| if (reg > pCode->registersSize) goto invalid_stream; |
| |
| emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); |
| localInReg[reg].live = false; |
| break; |
| |
| case DBG_RESTART_LOCAL: |
| reg = readUnsignedLeb128(&stream); |
| if (reg > pCode->registersSize) goto invalid_stream; |
| |
| if (localInReg[reg].name == NULL |
| || localInReg[reg].descriptor == NULL) { |
| goto invalid_stream; |
| } |
| |
| /* |
| * If the register is live, the "restart" is superfluous, |
| * and we don't want to mess with the existing start address. |
| */ |
| if (!localInReg[reg].live) { |
| localInReg[reg].startAddress = address; |
| localInReg[reg].live = true; |
| } |
| break; |
| |
| case DBG_SET_PROLOGUE_END: |
| case DBG_SET_EPILOGUE_BEGIN: |
| case DBG_SET_FILE: |
| break; |
| |
| default: { |
| int adjopcode = opcode - DBG_FIRST_SPECIAL; |
| |
| address += adjopcode / DBG_LINE_RANGE; |
| line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); |
| |
| if (posCb != NULL) { |
| int done; |
| done = posCb(cnxt, address, line); |
| |
| if (done) { |
| // early exit |
| goto end; |
| } |
| } |
| break; |
| } |
| } |
| } |
| |
| end: |
| { |
| int reg; |
| for (reg = 0; reg < pCode->registersSize; reg++) { |
| emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); |
| } |
| } |
| return; |
| |
| invalid_stream: |
| IF_LOGE() { |
| char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); |
| LOGE("Invalid debug info stream. class %s; proto %s", |
| classDescriptor, methodDescriptor); |
| free(methodDescriptor); |
| } |
| } |
| |