Blame - libdex/DexFile.c - fp2-dev/platform/dalvik

blob: 99b38c9161ffe7a8075cfa7da5e576d9725e5042 [file] [log] [blame]

The Android Open Source Project	f6c3871	2009-03-03 19:28:47 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2008 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
Andy McFadden	b51ea11	2009-05-08 16:50:17 -0700	[diff] [blame^]	16
The Android Open Source Project	f6c3871	2009-03-03 19:28:47 -0800	[diff] [blame]	17	/*
				18	* Access the contents of a .dex file.
				19	*/
				20
				21	#include "DexFile.h"
				22	#include "DexProto.h"
Andy McFadden	b51ea11	2009-05-08 16:50:17 -0700	[diff] [blame^]	23	#include "DexCatch.h"
The Android Open Source Project	f6c3871	2009-03-03 19:28:47 -0800	[diff] [blame]	24	#include "Leb128.h"
				25	#include "sha1.h"
				26	#include "ZipArchive.h"
				27
				28	#include <zlib.h>
				29
				30	#include <stdlib.h>
				31	#include <stddef.h>
				32	#include <string.h>
				33	#include <fcntl.h>
				34	#include <errno.h>
				35
				36	/*
				37	* Verifying checksums is good, but it slows things down and causes us to
				38	* touch every page. In the "optimized" world, it doesn't work at all,
				39	* because we rewrite the contents.
				40	*/
				41	static const bool kVerifyChecksum = false;
				42	static const bool kVerifySignature = false;
				43
				44
				45	/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
				46	* code point values for comparison. This treats different encodings
				47	* for the same code point as equivalent, except that only a real '\0'
				48	* byte is considered the string terminator. The return value is as
				49	* for strcmp(). */
				50	int dexUtf8Cmp(const char* s1, const char* s2) {
				51	for (;;) {
				52	if (*s1 == '\0') {
				53	if (*s2 == '\0') {
				54	return 0;
				55	}
				56	return -1;
				57	} else if (*s2 == '\0') {
				58	return 1;
				59	}
				60
				61	int utf1 = dexGetUtf16FromUtf8(&s1);
				62	int utf2 = dexGetUtf16FromUtf8(&s2);
				63	int diff = utf1 - utf2;
				64
				65	if (diff != 0) {
				66	return diff;
				67	}
				68	}
				69	}
				70
				71	/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
				72	u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
				73	0x00000000, // 00..1f low control characters; nothing valid
				74	0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
				75	0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
				76	0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z'
				77	};
				78
				79	/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
				80	bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
				81	/*
				82	* It's a multibyte encoded character. Decode it and analyze. We
				83	* accept anything that isn't (a) an improperly encoded low value,
				84	* (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
				85	* control character, or (e) a high space, layout, or special
				86	* character (U+00a0, U+2000..U+200f, U+2028..U+202f,
				87	* U+fff0..U+ffff).
				88	*/
				89
				90	u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
				91
				92	// Perform follow-up tests based on the high 8 bits.
				93	switch (utf16 >> 8) {
				94	case 0x00: {
				95	// It's only valid if it's above the ISO-8859-1 high space (0xa0).
				96	return (utf16 > 0x00a0);
				97	}
				98	case 0xd8:
				99	case 0xd9:
				100	case 0xda:
				101	case 0xdb: {
				102	/*
				103	* It's a leading surrogate. Check to see that a trailing
				104	* surrogate follows.
				105	*/
				106	utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
				107	return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
				108	}
				109	case 0xdc:
				110	case 0xdd:
				111	case 0xde:
				112	case 0xdf: {
				113	// It's a trailing surrogate, which is not valid at this point.
				114	return false;
				115	}
				116	case 0x20:
				117	case 0xff: {
				118	// It's in the range that has spaces, controls, and specials.
				119	switch (utf16 & 0xfff8) {
				120	case 0x2000:
				121	case 0x2008:
				122	case 0x2028:
				123	case 0xfff0:
				124	case 0xfff8: {
				125	return false;
				126	}
				127	}
				128	break;
				129	}
				130	}
				131
				132	return true;
				133	}
				134
				135	/* Return whether the given string is a valid field or method name. */
				136	bool dexIsValidMemberName(const char* s) {
				137	bool angleName = false;
				138
				139	switch (*s) {
				140	case '\0': {
				141	// The empty string is not a valid name.
				142	return false;
				143	}
				144	case '<': {
				145	/*
				146	* '<' is allowed only at the start of a name, and if present,
				147	* means that the name must end with '>'.
				148	*/
				149	angleName = true;
				150	s++;
				151	break;
				152	}
				153	}
				154
				155	for (;;) {
				156	switch (*s) {
				157	case '\0': {
				158	return !angleName;
				159	}
				160	case '>': {
				161	return angleName && s[1] == '\0';
				162	}
				163	}
				164	if (!dexIsValidMemberNameUtf8(&s)) {
				165	return false;
				166	}
				167	}
				168	}
				169
				170	/* Return whether the given string is a valid type descriptor. */
				171	bool dexIsValidTypeDescriptor(const char* s) {
				172	int arrayCount = 0;
				173
				174	while (*s == '[') {
				175	arrayCount++;
				176	s++;
				177	}
				178
				179	if (arrayCount > 255) {
				180	// Arrays may have no more than 255 dimensions.
				181	return false;
				182	}
				183
				184	switch (*(s++)) {
				185	case 'B':
				186	case 'C':
				187	case 'D':
				188	case 'F':
				189	case 'I':
				190	case 'J':
				191	case 'S':
				192	case 'Z': {
				193	// These are all single-character descriptors for primitive types.
				194	return (*s == '\0');
				195	}
				196	case 'V': {
				197	// You can't have an array of void.
				198	return (arrayCount == 0) && (*s == '\0');
				199	}
				200	case 'L': {
				201	// Break out and continue below.
				202	break;
				203	}
				204	default: {
				205	// Oddball descriptor character.
				206	return false;
				207	}
				208	}
				209
				210	// We just consumed the 'L' that introduces a class name.
				211
				212	bool slashOrFirst = true; // first character or just encountered a slash
				213	for (;;) {
				214	u1 c = (u1) *s;
				215	switch (c) {
				216	case '\0': {
				217	// Premature end.
				218	return false;
				219	}
				220	case ';': {
				221	/*
				222	* Make sure that this is the end of the string and that
				223	* it doesn't end with an empty component (including the
				224	* degenerate case of "L;").
				225	*/
				226	return (s[1] == '\0') && !slashOrFirst;
				227	}
				228	case '/': {
				229	if (slashOrFirst) {
				230	// Slash at start or two slashes in a row.
				231	return false;
				232	}
				233	slashOrFirst = true;
				234	s++;
				235	break;
				236	}
				237	default: {
				238	if (!dexIsValidMemberNameUtf8(&s)) {
				239	return false;
				240	}
				241	slashOrFirst = false;
				242	break;
				243	}
				244	}
				245	}
				246	}
				247
				248	/* Return whether the given string is a valid reference descriptor. This
				249	* is true if dexIsValidTypeDescriptor() returns true and the descriptor
				250	* is for a class or array and not a primitive type. */
				251	bool dexIsReferenceDescriptor(const char* s) {
				252	if (!dexIsValidTypeDescriptor(s)) {
				253	return false;
				254	}
				255
				256	return (s[0] == 'L') \|\| (s[0] == '[');
				257	}
				258
				259	/* Return whether the given string is a valid class descriptor. This
				260	* is true if dexIsValidTypeDescriptor() returns true and the descriptor
				261	* is for a class and not an array or primitive type. */
				262	bool dexIsClassDescriptor(const char* s) {
				263	if (!dexIsValidTypeDescriptor(s)) {
				264	return false;
				265	}
				266
				267	return s[0] == 'L';
				268	}
				269
				270	/* Return whether the given string is a valid field type descriptor. This
				271	* is true if dexIsValidTypeDescriptor() returns true and the descriptor
				272	* is for anything but "void". */
				273	bool dexIsFieldDescriptor(const char* s) {
				274	if (!dexIsValidTypeDescriptor(s)) {
				275	return false;
				276	}
				277
				278	return s[0] != 'V';
				279	}
				280
				281	/* Return the UTF-8 encoded string with the specified string_id index,
				282	* also filling in the UTF-16 size (number of 16-bit code points).*/
				283	const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
				284	u4* utf16Size) {
				285	const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
				286	const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
				287
				288	*utf16Size = readUnsignedLeb128(&ptr);
				289	return (const char*) ptr;
				290	}
				291
				292	/*
				293	* Format an SHA-1 digest for printing. tmpBuf must be able to hold at
				294	* least kSHA1DigestOutputLen bytes.
				295	*/
				296	const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
				297
				298	/*
				299	* Compute a SHA-1 digest on a range of bytes.
				300	*/
				301	static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
				302	unsigned char digest[])
				303	{
				304	SHA1_CTX context;
				305	SHA1Init(&context);
				306	SHA1Update(&context, data, length);
				307	SHA1Final(digest, &context);
				308	}
				309
				310	/*
				311	* Format the SHA-1 digest into the buffer, which must be able to hold at
				312	* least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer,
				313	*/
				314	static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
				315	{
				316	static const char hexDigit[] = "0123456789abcdef";
				317	char* cp;
				318	int i;
				319
				320	cp = tmpBuf;
				321	for (i = 0; i < kSHA1DigestLen; i++) {
				322	*cp++ = hexDigit[digest[i] >> 4];
				323	*cp++ = hexDigit[digest[i] & 0x0f];
				324	}
				325	*cp++ = '\0';
				326
				327	assert(cp == tmpBuf + kSHA1DigestOutputLen);
				328
				329	return tmpBuf;
				330	}
				331
				332	/*
				333	* Compute a hash code on a UTF-8 string, for use with internal hash tables.
				334	*
				335	* This may or may not be compatible with UTF-8 hash functions used inside
				336	* the Dalvik VM.
				337	*
				338	* The basic "multiply by 31 and add" approach does better on class names
				339	* than most other things tried (e.g. adler32).
				340	*/
				341	static u4 classDescriptorHash(const char* str)
				342	{
				343	u4 hash = 1;
				344
				345	while (*str != '\0')
				346	hash = hash * 31 + *str++;
				347
				348	return hash;
				349	}
				350
				351	/*
				352	* Add an entry to the class lookup table. We hash the string and probe
				353	* until we find an open slot.
				354	*/
				355	static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
				356	int stringOff, int classDefOff, int* pNumProbes)
				357	{
				358	const char* classDescriptor =
				359	(const char*) (pDexFile->baseAddr + stringOff);
				360	const DexClassDef* pClassDef =
				361	(const DexClassDef*) (pDexFile->baseAddr + classDefOff);
				362	u4 hash = classDescriptorHash(classDescriptor);
				363	int mask = pLookup->numEntries-1;
				364	int idx = hash & mask;
				365
				366	/*
				367	* Find the first empty slot. We oversized the table, so this is
				368	* guaranteed to finish.
				369	*/
				370	int probes = 0;
				371	while (pLookup->table[idx].classDescriptorOffset != 0) {
				372	idx = (idx + 1) & mask;
				373	probes++;
				374	}
				375	//if (probes > 1)
				376	// LOGW("classLookupAdd: probes=%d\n", probes);
				377
				378	pLookup->table[idx].classDescriptorHash = hash;
				379	pLookup->table[idx].classDescriptorOffset = stringOff;
				380	pLookup->table[idx].classDefOffset = classDefOff;
				381	*pNumProbes = probes;
				382	}
				383
				384	/*
				385	* Round up to the next highest power of 2.
				386	*
				387	* Found on http://graphics.stanford.edu/~seander/bithacks.html.
				388	*/
				389	u4 dexRoundUpPower2(u4 val)
				390	{
				391	val--;
				392	val \|= val >> 1;
				393	val \|= val >> 2;
				394	val \|= val >> 4;
				395	val \|= val >> 8;
				396	val \|= val >> 16;
				397	val++;
				398
				399	return val;
				400	}
				401
				402	/*
				403	* Create the class lookup hash table.
				404	*
				405	* Returns newly-allocated storage.
				406	*/
				407	DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
				408	{
				409	DexClassLookup* pLookup;
				410	int allocSize;
				411	int i, numEntries;
				412	int numProbes, totalProbes, maxProbes;
				413
				414	numProbes = totalProbes = maxProbes = 0;
				415
				416	assert(pDexFile != NULL);
				417
				418	/*
				419	* Using a factor of 3 results in far less probing than a factor of 2,
				420	* but almost doubles the flash storage requirements for the bootstrap
				421	* DEX files. The overall impact on class loading performance seems
				422	* to be minor. We could probably get some performance improvement by
				423	* using a secondary hash.
				424	*/
				425	numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
				426	allocSize = offsetof(DexClassLookup, table)
				427	+ numEntries * sizeof(pLookup->table[0]);
				428
				429	pLookup = (DexClassLookup*) calloc(1, allocSize);
				430	if (pLookup == NULL)
				431	return NULL;
				432	pLookup->size = allocSize;
				433	pLookup->numEntries = numEntries;
				434
				435	for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
				436	const DexClassDef* pClassDef;
				437	const char* pString;
				438
				439	pClassDef = dexGetClassDef(pDexFile, i);
				440	pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
				441
				442	classLookupAdd(pDexFile, pLookup,
				443	(u1*)pString - pDexFile->baseAddr,
				444	(u1*)pClassDef - pDexFile->baseAddr, &numProbes);
				445
				446	if (numProbes > maxProbes)
				447	maxProbes = numProbes;
				448	totalProbes += numProbes;
				449	}
				450
				451	LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
				452	" total=%d max=%d\n",
				453	pDexFile->pHeader->classDefsSize, numEntries,
				454	(100 * pDexFile->pHeader->classDefsSize) / numEntries,
				455	allocSize, totalProbes, maxProbes);
				456
				457	return pLookup;
				458	}
				459
				460
				461	/*
				462	* Set up the basic raw data pointers of a DexFile. This function isn't
				463	* meant for general use.
				464	*/
				465	void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
				466	DexHeader pHeader = (DexHeader) data;
				467
				468	pDexFile->baseAddr = data;
				469	pDexFile->pHeader = pHeader;
				470	pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
				471	pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
				472	pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
				473	pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
				474	pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
				475	pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
				476	pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
				477	}
				478
				479
				480	/*
				481	* Parse out an index map entry, advancing "pData" and reducing "pSize".
				482	*/
				483	static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
				484	u4* pFullCount, u4* pReducedCount, const u2** pMap)
				485	{
				486	const u4* wordPtr = (const u4) pData;
				487	u4 size = *pSize;
				488	u4 mapCount;
				489
				490	if (expanding) {
				491	if (size < 4)
				492	return false;
				493	mapCount = pReducedCount = wordPtr++;
				494	*pFullCount = (u4) -1;
				495	size -= sizeof(u4);
				496	} else {
				497	if (size < 8)
				498	return false;
				499	mapCount = pFullCount = wordPtr++;
				500	pReducedCount = wordPtr++;
				501	size -= sizeof(u4) * 2;
				502	}
				503
				504	u4 mapSize = mapCount * sizeof(u2);
				505
				506	if (size < mapSize)
				507	return false;
				508	pMap = (const u2) wordPtr;
				509	size -= mapSize;
				510
				511	/* advance the pointer */
				512	const u1* ptr = (const u1*) wordPtr;
				513	ptr += (mapSize + 3) & ~0x3;
				514
				515	/* update pass-by-reference values */
				516	pData = (const u1) ptr;
				517	*pSize = size;
				518
				519	return true;
				520	}
				521
				522	/*
				523	* Set up some pointers into the mapped data.
				524	*
				525	* See analysis/ReduceConstants.c for the data layout description.
				526	*/
				527	static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
				528	bool expanding)
				529	{
				530	if (!parseIndexMapEntry(&data, &size, expanding,
				531	&pDexFile->indexMap.classFullCount,
				532	&pDexFile->indexMap.classReducedCount,
				533	&pDexFile->indexMap.classMap))
				534	{
				535	return false;
				536	}
				537
				538	if (!parseIndexMapEntry(&data, &size, expanding,
				539	&pDexFile->indexMap.methodFullCount,
				540	&pDexFile->indexMap.methodReducedCount,
				541	&pDexFile->indexMap.methodMap))
				542	{
				543	return false;
				544	}
				545
				546	if (!parseIndexMapEntry(&data, &size, expanding,
				547	&pDexFile->indexMap.fieldFullCount,
				548	&pDexFile->indexMap.fieldReducedCount,
				549	&pDexFile->indexMap.fieldMap))
				550	{
				551	return false;
				552	}
				553
				554	if (!parseIndexMapEntry(&data, &size, expanding,
				555	&pDexFile->indexMap.stringFullCount,
				556	&pDexFile->indexMap.stringReducedCount,
				557	&pDexFile->indexMap.stringMap))
				558	{
				559	return false;
				560	}
				561
				562	if (expanding) {
				563	/*
				564	* The map includes the "reduced" counts; pull the original counts
				565	* out of the DexFile so that code has a consistent source.
				566	*/
				567	assert(pDexFile->indexMap.classFullCount == (u4) -1);
				568	assert(pDexFile->indexMap.methodFullCount == (u4) -1);
				569	assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
				570	assert(pDexFile->indexMap.stringFullCount == (u4) -1);
				571
				572	#if 0 // TODO: not available yet -- do later or just skip this
				573	pDexFile->indexMap.classFullCount =
				574	pDexFile->pHeader->typeIdsSize;
				575	pDexFile->indexMap.methodFullCount =
				576	pDexFile->pHeader->methodIdsSize;
				577	pDexFile->indexMap.fieldFullCount =
				578	pDexFile->pHeader->fieldIdsSize;
				579	pDexFile->indexMap.stringFullCount =
				580	pDexFile->pHeader->stringIdsSize;
				581	#endif
				582	}
				583
				584	LOGI("Class : %u %u %u\n",
				585	pDexFile->indexMap.classFullCount,
				586	pDexFile->indexMap.classReducedCount,
				587	pDexFile->indexMap.classMap[0]);
				588	LOGI("Method: %u %u %u\n",
				589	pDexFile->indexMap.methodFullCount,
				590	pDexFile->indexMap.methodReducedCount,
				591	pDexFile->indexMap.methodMap[0]);
				592	LOGI("Field : %u %u %u\n",
				593	pDexFile->indexMap.fieldFullCount,
				594	pDexFile->indexMap.fieldReducedCount,
				595	pDexFile->indexMap.fieldMap[0]);
				596	LOGI("String: %u %u %u\n",
				597	pDexFile->indexMap.stringFullCount,
				598	pDexFile->indexMap.stringReducedCount,
				599	pDexFile->indexMap.stringMap[0]);
				600
				601	return true;
				602	}
				603
				604	/*
				605	* Parse some auxillary data tables.
				606	*
				607	* v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
				608	* table. Subsequent versions switched to the "chunk" format.
				609	*/
				610	static bool parseAuxData(const u1* data, DexFile* pDexFile)
				611	{
				612	const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
				613	u4 indexMapType = 0;
				614
				615	/* v1.0 format? */
				616	if (*pAux == 0) {
				617	LOGV("+++ found OLD dex format\n");
				618	pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
				619	return true;
				620	}
				621	LOGV("+++ found NEW dex format\n");
				622
				623	/* process chunks until we see the end marker */
				624	while (*pAux != kDexChunkEnd) {
				625	u4 size = *(pAux+1);
				626	u1* data = (u1*) (pAux + 2);
				627
				628	switch (*pAux) {
				629	case kDexChunkClassLookup:
				630	pDexFile->pClassLookup = (const DexClassLookup*) data;
				631	break;
				632	case kDexChunkReducingIndexMap:
				633	LOGI("+++ found reducing index map, size=%u\n", size);
				634	if (!parseIndexMap(pDexFile, data, size, false)) {
				635	LOGE("Failed parsing reducing index map\n");
				636	return false;
				637	}
				638	indexMapType = *pAux;
				639	break;
				640	case kDexChunkExpandingIndexMap:
				641	LOGI("+++ found expanding index map, size=%u\n", size);
				642	if (!parseIndexMap(pDexFile, data, size, true)) {
				643	LOGE("Failed parsing expanding index map\n");
				644	return false;
				645	}
				646	indexMapType = *pAux;
				647	break;
The Android Open Source Project	9940988	2009-03-18 22:20:24 -0700	[diff] [blame]	648	case kDexChunkRegisterMaps:
				649	LOGV("+++ found register maps, size=%u\n", size);
				650	pDexFile->pRegisterMapPool = data;
				651	break;
The Android Open Source Project	f6c3871	2009-03-03 19:28:47 -0800	[diff] [blame]	652	default:
				653	LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
				654	*pAux,
				655	(char) ((pAux) >> 24), (char) ((pAux) >> 16),
				656	(char) ((pAux) >> 8), (char) (pAux),
				657	size);
				658	break;
				659	}
				660
				661	/*
				662	* Advance pointer, padding to 64-bit boundary. The extra "+8" is
				663	* for the type/size header.
				664	*/
				665	size = (size + 8 + 7) & ~7;
				666	pAux += size / sizeof(u4);
				667	}
				668
				669	#if 0 // TODO: propagate expected map type from the VM through the API
				670	/*
				671	* If we're configured to expect an index map, and we don't find one,
				672	* reject this DEX so we'll regenerate it. Also, if we found an
				673	* "expanding" map but we're not configured to use it, we have to fail
				674	* because the constants aren't usable without translation.
				675	*/
				676	if (indexMapType != expectedIndexMapType) {
				677	LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
				678	indexMapType, DVM_REDUCE_CONSTANTS);
				679	return false;
				680	}
				681	#endif
				682
				683	return true;
				684	}
				685
				686	/*
				687	* Parse an optimized or unoptimized .dex file sitting in memory. This is
				688	* called after the byte-ordering and structure alignment has been fixed up.
				689	*
				690	* On success, return a newly-allocated DexFile.
				691	*/
				692	DexFile* dexFileParse(const u1* data, size_t length, int flags)
				693	{
				694	DexFile* pDexFile = NULL;
				695	const DexHeader* pHeader;
				696	const u1* magic;
				697	int result = -1;
				698
				699	if (length < sizeof(DexHeader)) {
				700	LOGE("too short to be a valid .dex\n");
				701	goto bail; /* bad file format */
				702	}
				703
				704	pDexFile = (DexFile*) malloc(sizeof(DexFile));
				705	if (pDexFile == NULL)
				706	goto bail; /* alloc failure */
				707	memset(pDexFile, 0, sizeof(DexFile));
				708
				709	/*
				710	* Peel off the optimized header.
				711	*/
				712	if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
				713	magic = data;
				714	if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
				715	LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
				716	magic[4], magic[5], magic[6], magic[7]);
				717	goto bail;
				718	}
				719
				720	pDexFile->pOptHeader = (const DexOptHeader*) data;
				721	LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
				722	pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
				723
				724	/* locate some auxillary data tables */
				725	if (!parseAuxData(data, pDexFile))
				726	goto bail;
				727
				728	/* ignore the opt header and appended data from here on out */
				729	data += pDexFile->pOptHeader->dexOffset;
				730	length -= pDexFile->pOptHeader->dexOffset;
				731	if (pDexFile->pOptHeader->dexLength > length) {
				732	LOGE("File truncated? stored len=%d, rem len=%d\n",
				733	pDexFile->pOptHeader->dexLength, (int) length);
				734	goto bail;
				735	}
				736	length = pDexFile->pOptHeader->dexLength;
				737	}
				738
				739	dexFileSetupBasicPointers(pDexFile, data);
				740	pHeader = pDexFile->pHeader;
				741
				742	magic = pHeader->magic;
				743	if (memcmp(magic, DEX_MAGIC, 4) != 0) {
				744	/* not expected */
				745	LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
				746	magic[0], magic[1], magic[2], magic[3]);
				747	goto bail;
				748	}
				749	if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
				750	LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
				751	magic[4], magic[5], magic[6], magic[7]);
				752	goto bail;
				753	}
				754
				755	/*
				756	* Verify the checksum. This is reasonably quick, but does require
				757	* touching every byte in the DEX file. The checksum changes after
				758	* byte-swapping and DEX optimization.
				759	*/
				760	if (flags & kDexParseVerifyChecksum) {
				761	u4 adler = dexComputeChecksum(pHeader);
				762	if (adler != pHeader->checksum) {
				763	LOGE("ERROR: bad checksum (%08x vs %08x)\n",
				764	adler, pHeader->checksum);
				765	if (!(flags & kDexParseContinueOnError))
				766	goto bail;
				767	} else {
				768	LOGV("+++ adler32 checksum (%08x) verified\n", adler);
				769	}
				770	}
				771
				772	/*
				773	* Verify the SHA-1 digest. (Normally we don't want to do this --
				774	* the digest is used to uniquely identify a DEX file, and can't be
				775	* computed post-optimization.)
				776	*
				777	* The digest will be invalid after byte swapping and DEX optimization.
				778	*/
				779	if (kVerifySignature) {
				780	unsigned char sha1Digest[kSHA1DigestLen];
				781	const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
				782	kSHA1DigestLen;
				783
				784	dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
				785	if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
				786	char tmpBuf1[kSHA1DigestOutputLen];
				787	char tmpBuf2[kSHA1DigestOutputLen];
				788	LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
				789	dexSHA1DigestToStr(sha1Digest, tmpBuf1),
				790	dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
				791	if (!(flags & kDexParseContinueOnError))
				792	goto bail;
				793	} else {
				794	LOGV("+++ sha1 digest verified\n");
				795	}
				796	}
				797
				798	if (pHeader->fileSize != length) {
				799	LOGE("ERROR: stored file size (%d) != expected (%d)\n",
				800	(int) pHeader->fileSize, (int) length);
				801	if (!(flags & kDexParseContinueOnError))
				802	goto bail;
				803	}
				804
				805	if (pHeader->classDefsSize == 0) {
				806	LOGE("ERROR: DEX file has no classes in it, failing\n");
				807	goto bail;
				808	}
				809
				810	/*
				811	* Success!
				812	*/
				813	result = 0;
				814
				815	bail:
				816	if (result != 0 && pDexFile != NULL) {
				817	dexFileFree(pDexFile);
				818	pDexFile = NULL;
				819	}
				820	return pDexFile;
				821	}
				822
				823	/*
				824	* Free up the DexFile and any associated data structures.
				825	*
				826	* Note we may be called with a partially-initialized DexFile.
				827	*/
				828	void dexFileFree(DexFile* pDexFile)
				829	{
				830	if (pDexFile == NULL)
				831	return;
				832
				833	free(pDexFile);
				834	}
				835
				836	/*
				837	* Look up a class definition entry by descriptor.
				838	*
				839	* "descriptor" should look like "Landroid/debug/Stuff;".
				840	*/
				841	const DexClassDef* dexFindClass(const DexFile* pDexFile,
				842	const char* descriptor)
				843	{
				844	const DexClassLookup* pLookup = pDexFile->pClassLookup;
				845	u4 hash;
				846	int idx, mask;
				847
				848	hash = classDescriptorHash(descriptor);
				849	mask = pLookup->numEntries - 1;
				850	idx = hash & mask;
				851
				852	/*
				853	* Search until we find a matching entry or an empty slot.
				854	*/
				855	while (true) {
				856	int offset;
				857
				858	offset = pLookup->table[idx].classDescriptorOffset;
				859	if (offset == 0)
				860	return NULL;
				861
				862	if (pLookup->table[idx].classDescriptorHash == hash) {
				863	const char* str;
				864
				865	str = (const char*) (pDexFile->baseAddr + offset);
				866	if (strcmp(str, descriptor) == 0) {
				867	return (const DexClassDef*)
				868	(pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
				869	}
				870	}
				871
				872	idx = (idx + 1) & mask;
				873	}
				874	}
				875
				876
				877	/*
				878	* Compute the DEX file checksum for a memory-mapped DEX file.
				879	*/
				880	u4 dexComputeChecksum(const DexHeader* pHeader)
				881	{
				882	const u1* start = (const u1*) pHeader;
				883
				884	uLong adler = adler32(0L, Z_NULL, 0);
				885	const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
				886
				887	return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
				888	}
				889
				890
				891	/*
Andy McFadden	b51ea11	2009-05-08 16:50:17 -0700	[diff] [blame^]	892	* Compute the size, in bytes, of a DexCode.
				893	*/
				894	size_t dexGetDexCodeSize(const DexCode* pCode)
				895	{
				896	/*
				897	* The catch handler data is the last entry. It has a variable number
				898	* of variable-size pieces, so we need to create an iterator.
				899	*/
				900	u4 handlersSize;
				901	u4 offset;
				902	u4 ui;
				903
				904	if (pCode->triesSize != 0) {
				905	handlersSize = dexGetHandlersSize(pCode);
				906	offset = dexGetFirstHandlerOffset(pCode);
				907	} else {
				908	handlersSize = 0;
				909	offset = 0;
				910	}
				911
				912	for (ui = 0; ui < handlersSize; ui++) {
				913	DexCatchIterator iterator;
				914	dexCatchIteratorInit(&iterator, pCode, offset);
				915	offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
				916	}
				917
				918	const u1* handlerData = dexGetCatchHandlerData(pCode);
				919
				920	//LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
				921	// pCode, handlerData, offset);
				922
				923	/* return the size of the catch handler + everything before it */
				924	return (handlerData - (u1*) pCode) + offset;
				925	}
				926
				927
				928	/*
The Android Open Source Project	f6c3871	2009-03-03 19:28:47 -0800	[diff] [blame]	929	* ===========================================================================
				930	* Debug info
				931	* ===========================================================================
				932	*/
				933
				934	/*
				935	* Decode the arguments in a method signature, which looks something
				936	* like "(ID[Ljava/lang/String;)V".
				937	*
				938	* Returns the type signature letter for the next argument, or ')' if
				939	* there are no more args. Advances "pSig" to point to the character
				940	* after the one returned.
				941	*/
				942	static char decodeSignature(const char** pSig)
				943	{
				944	const char* sig = *pSig;
				945
				946	if (*sig == '(')
				947	sig++;
				948
				949	if (*sig == 'L') {
				950	/* object ref */
				951	while (*++sig != ';')
				952	;
				953	*pSig = sig+1;
				954	return 'L';
				955	}
				956	if (*sig == '[') {
				957	/* array; advance past array type */
				958	while (*++sig == '[')
				959	;
				960	if (*sig == 'L') {
				961	while (*++sig != ';')
				962	;
				963	}
				964	*pSig = sig+1;
				965	return '[';
				966	}
				967	if (*sig == '\0')
				968	return sig; / don't advance further */
				969
				970	*pSig = sig+1;
				971	return *sig;
				972	}
				973
				974	/*
				975	* returns the length of a type string, given the start of the
				976	* type string. Used for the case where the debug info format
				977	* references types that are inside a method type signature.
				978	*/
				979	static int typeLength (const char *type) {
				980	// Assumes any leading '(' has already been gobbled
				981	const char *end = type;
				982	decodeSignature(&end);
				983	return end - type;
				984	}
				985
				986	/*
				987	* Reads a string index as encoded for the debug info format,
				988	* returning a string pointer or NULL as appropriate.
				989	*/
				990	static const char* readStringIdx(const DexFile* pDexFile,
				991	const u1** pStream) {
				992	u4 stringIdx = readUnsignedLeb128(pStream);
				993
				994	// Remember, encoded string indicies have 1 added to them.
				995	if (stringIdx == 0) {
				996	return NULL;
				997	} else {
				998	return dexStringById(pDexFile, stringIdx - 1);
				999	}
				1000	}
				1001
				1002	/*
				1003	* Reads a type index as encoded for the debug info format, returning
				1004	* a string pointer for its descriptor or NULL as appropriate.
				1005	*/
				1006	static const char* readTypeIdx(const DexFile* pDexFile,
				1007	const u1** pStream) {
				1008	u4 typeIdx = readUnsignedLeb128(pStream);
				1009
				1010	// Remember, encoded type indicies have 1 added to them.
				1011	if (typeIdx == 0) {
				1012	return NULL;
				1013	} else {
				1014	return dexStringByTypeIdx(pDexFile, typeIdx - 1);
				1015	}
				1016	}
				1017
				1018	/* access_flag value indicating that a method is static */
				1019	#define ACC_STATIC 0x0008
				1020
				1021	typedef struct LocalInfo {
				1022	const char *name;
				1023	const char *descriptor;
				1024	const char *signature;
				1025	u2 startAddress;
				1026	bool live;
				1027	} LocalInfo;
				1028
				1029	static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
				1030	LocalInfo *localInReg, DexDebugNewLocalCb localCb)
				1031	{
				1032	if (localCb != NULL && localInReg[reg].live) {
				1033	localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
				1034	localInReg[reg].name,
				1035	localInReg[reg].descriptor,
				1036	localInReg[reg].signature == NULL
				1037	? "" : localInReg[reg].signature );
				1038	}
				1039	}
				1040
				1041	// TODO optimize localCb == NULL case
				1042	void dexDecodeDebugInfo(
				1043	const DexFile* pDexFile,
				1044	const DexCode* pCode,
				1045	const char* classDescriptor,
				1046	u4 protoIdx,
				1047	u4 accessFlags,
				1048	DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
				1049	void* cnxt)
				1050	{
				1051	const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
				1052	u4 line;
				1053	u4 parametersSize;
				1054	u4 address = 0;
				1055	LocalInfo localInReg[pCode->registersSize];
				1056	u4 insnsSize = pCode->insnsSize;
				1057	DexProto proto = { pDexFile, protoIdx };
				1058
				1059	memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
				1060
				1061	if (stream == NULL) {
				1062	goto end;
				1063	}
				1064
				1065	line = readUnsignedLeb128(&stream);
				1066	parametersSize = readUnsignedLeb128(&stream);
				1067
				1068	u2 argReg = pCode->registersSize - pCode->insSize;
				1069
				1070	if ((accessFlags & ACC_STATIC) == 0) {
				1071	/*
				1072	* The code is an instance method, which means that there is
				1073	* an initial this parameter. Also, the proto list should
				1074	* contain exactly one fewer argument word than the insSize
				1075	* indicates.
				1076	*/
				1077	assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
				1078	localInReg[argReg].name = "this";
				1079	localInReg[argReg].descriptor = classDescriptor;
				1080	localInReg[argReg].startAddress = 0;
				1081	localInReg[argReg].live = true;
				1082	argReg++;
				1083	} else {
				1084	assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
				1085	}
				1086
				1087	DexParameterIterator iterator;
				1088	dexParameterIteratorInit(&iterator, &proto);
				1089
				1090	while (parametersSize-- != 0) {
				1091	const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
				1092	const char *name;
				1093	int reg;
				1094
				1095	if ((argReg >= pCode->registersSize) \|\| (descriptor == NULL)) {
				1096	goto invalid_stream;
				1097	}
				1098
				1099	name = readStringIdx(pDexFile, &stream);
				1100	reg = argReg;
				1101
				1102	switch (descriptor[0]) {
				1103	case 'D':
				1104	case 'J':
				1105	argReg += 2;
				1106	break;
				1107	default:
				1108	argReg += 1;
				1109	break;
				1110	}
				1111
				1112	if (name != NULL) {
				1113	localInReg[reg].name = name;
				1114	localInReg[reg].descriptor = descriptor;
				1115	localInReg[reg].signature = NULL;
				1116	localInReg[reg].startAddress = address;
				1117	localInReg[reg].live = true;
				1118	}
				1119	}
				1120
				1121	for (;;) {
				1122	u1 opcode = *stream++;
				1123	u2 reg;
				1124
				1125	switch (opcode) {
				1126	case DBG_END_SEQUENCE:
				1127	goto end;
				1128
				1129	case DBG_ADVANCE_PC:
				1130	address += readUnsignedLeb128(&stream);
				1131	break;
				1132
				1133	case DBG_ADVANCE_LINE:
				1134	line += readSignedLeb128(&stream);
				1135	break;
				1136
				1137	case DBG_START_LOCAL:
				1138	case DBG_START_LOCAL_EXTENDED:
				1139	reg = readUnsignedLeb128(&stream);
				1140	if (reg > pCode->registersSize) goto invalid_stream;
				1141
				1142	// Emit what was previously there, if anything
				1143	emitLocalCbIfLive (cnxt, reg, address,
				1144	localInReg, localCb);
				1145
				1146	localInReg[reg].name = readStringIdx(pDexFile, &stream);
				1147	localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
				1148	if (opcode == DBG_START_LOCAL_EXTENDED) {
				1149	localInReg[reg].signature
				1150	= readStringIdx(pDexFile, &stream);
				1151	} else {
				1152	localInReg[reg].signature = NULL;
				1153	}
				1154	localInReg[reg].startAddress = address;
				1155	localInReg[reg].live = true;
				1156	break;
				1157
				1158	case DBG_END_LOCAL:
				1159	reg = readUnsignedLeb128(&stream);
				1160	if (reg > pCode->registersSize) goto invalid_stream;
				1161
				1162	emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
				1163	localInReg[reg].live = false;
				1164	break;
				1165
				1166	case DBG_RESTART_LOCAL:
				1167	reg = readUnsignedLeb128(&stream);
				1168	if (reg > pCode->registersSize) goto invalid_stream;
				1169
				1170	if (localInReg[reg].name == NULL
				1171	\|\| localInReg[reg].descriptor == NULL) {
				1172	goto invalid_stream;
				1173	}
				1174
				1175	/*
				1176	* If the register is live, the "restart" is superfluous,
				1177	* and we don't want to mess with the existing start address.
				1178	*/
				1179	if (!localInReg[reg].live) {
				1180	localInReg[reg].startAddress = address;
				1181	localInReg[reg].live = true;
				1182	}
				1183	break;
				1184
				1185	case DBG_SET_PROLOGUE_END:
				1186	case DBG_SET_EPILOGUE_BEGIN:
				1187	case DBG_SET_FILE:
				1188	break;
				1189
				1190	default: {
				1191	int adjopcode = opcode - DBG_FIRST_SPECIAL;
				1192
				1193	address += adjopcode / DBG_LINE_RANGE;
				1194	line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
				1195
				1196	if (posCb != NULL) {
				1197	int done;
				1198	done = posCb(cnxt, address, line);
				1199
				1200	if (done) {
				1201	// early exit
				1202	goto end;
				1203	}
				1204	}
				1205	break;
				1206	}
				1207	}
				1208	}
				1209
				1210	end:
				1211	{
				1212	int reg;
				1213	for (reg = 0; reg < pCode->registersSize; reg++) {
				1214	emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
				1215	}
				1216	}
				1217	return;
				1218
				1219	invalid_stream:
				1220	IF_LOGE() {
				1221	char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
				1222	LOGE("Invalid debug info stream. class %s; proto %s",
				1223	classDescriptor, methodDescriptor);
				1224	free(methodDescriptor);
				1225	}
				1226	}
Andy McFadden	b51ea11	2009-05-08 16:50:17 -0700	[diff] [blame^]	1227