auto import from //depot/cupcake/@135843
diff --git a/vm/analysis/CodeVerify.c b/vm/analysis/CodeVerify.c
new file mode 100644
index 0000000..65aa833
--- /dev/null
+++ b/vm/analysis/CodeVerify.c
@@ -0,0 +1,5420 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik bytecode structural verifier. The only public entry point
+ * (except for a few shared utility functions) is dvmVerifyCodeFlow().
+ *
+ * TODO: might benefit from a signature-->class lookup cache. Could avoid
+ * some string-peeling and wouldn't need to compute hashes.
+ *
+ * TODO: we do too much stuff in here that could be done in the static
+ * verification pass. It's convenient, because we have all of the
+ * necessary information, but it's more efficient to do it over in
+ * DexVerify.c because in here we may have to process instructions
+ * multiple times.
+ */
+#include "Dalvik.h"
+#include "analysis/CodeVerify.h"
+#include "analysis/RegisterMap.h"
+#include "libdex/DexCatch.h"
+#include "libdex/InstrUtils.h"
+
+#include <stddef.h>
+
+
+/*
+ * We don't need to store the register data for many instructions, because
+ * we either only need it at branch points (for verification) or GC points
+ * and branches (for verification + type-precise register analysis).
+ */
+typedef enum RegisterTrackingMode {
+ kTrackRegsBranches,
+ kTrackRegsGcPoints,
+ kTrackRegsAll
+} RegisterTrackingMode;
+
+/*
+ * Set this to enable dead code scanning. This is not required, but it's
+ * very useful when testing changes to the verifier (to make sure we're not
+ * skipping over stuff) and for checking the optimized output from "dx".
+ * The only reason not to do it is that it slightly increases the time
+ * required to perform verification.
+ */
+#define DEAD_CODE_SCAN true
+
+static bool gDebugVerbose = false; // TODO: remove this
+
+#if 0
+int gDvm__totalInstr = 0;
+int gDvm__gcInstr = 0;
+int gDvm__gcData = 0;
+int gDvm__gcSimpleData = 0;
+#endif
+
+/*
+ * Selectively enable verbose debug logging -- use this to activate
+ * dumpRegTypes() calls for all instructions in the specified method.
+ */
+static inline bool doVerboseLogging(const Method* meth) {
+ return false; /* COMMENT OUT to enable verbose debugging */
+
+ const char* cd = "Lop_lshr;";
+ const char* mn = "test";
+ const char* sg = "(II)J";
+ return (strcmp(meth->clazz->descriptor, cd) == 0 &&
+ dvmCompareNameDescriptorAndMethod(mn, sg, meth) == 0);
+}
+
+#define SHOW_REG_DETAILS (0 /*| DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS*/)
+
+/*
+ * We need an extra "pseudo register" to hold the return type briefly. It
+ * can be category 1 or 2, so we need two slots.
+ */
+#define kExtraRegs 2
+#define RESULT_REGISTER(_insnRegCount) (_insnRegCount)
+
+/*
+ * Big fat collection of registers.
+ */
+typedef struct RegisterTable {
+ /*
+ * Array of RegType arrays, one per address in the method. We only
+ * set the pointers for certain addresses, based on what we're trying
+ * to accomplish.
+ */
+ RegType** addrRegs;
+
+ /*
+ * Number of registers we track for each instruction. This is equal
+ * to the method's declared "registersSize" plus kExtraRegs.
+ */
+ int insnRegCountPlus;
+
+ /*
+ * A single large alloc, with all of the storage needed for addrRegs.
+ */
+ RegType* regAlloc;
+} RegisterTable;
+
+
+/* fwd */
+static void checkMergeTab(void);
+static bool isInitMethod(const Method* meth);
+static RegType getInvocationThis(const RegType* insnRegs,\
+ const int insnRegCount, const DecodedInstruction* pDecInsn, bool* pOkay);
+static void verifyRegisterType(const RegType* insnRegs, const int insnRegCount,\
+ u4 vsrc, RegType checkType, bool* pOkay);
+static bool doCodeVerification(const Method* meth, InsnFlags* insnFlags,\
+ RegisterTable* regTable, UninitInstanceMap* uninitMap);
+static bool verifyInstruction(const Method* meth, InsnFlags* insnFlags,\
+ RegisterTable* regTable, RegType* workRegs, int insnIdx,
+ UninitInstanceMap* uninitMap, int* pStartGuess);
+static ClassObject* findCommonSuperclass(ClassObject* c1, ClassObject* c2);
+static void dumpRegTypes(const Method* meth, const InsnFlags* insnFlags,\
+ const RegType* addrRegs, int addr, const char* addrName,
+ const UninitInstanceMap* uninitMap, int displayFlags);
+
+/* bit values for dumpRegTypes() "displayFlags" */
+enum {
+ DRT_SIMPLE = 0,
+ DRT_SHOW_REF_TYPES = 0x01,
+ DRT_SHOW_LOCALS = 0x02,
+};
+
+
+/*
+ * ===========================================================================
+ * RegType and UninitInstanceMap utility functions
+ * ===========================================================================
+ */
+
+#define __ kRegTypeUnknown
+#define _U kRegTypeUninit
+#define _X kRegTypeConflict
+#define _F kRegTypeFloat
+#define _0 kRegTypeZero
+#define _1 kRegTypeOne
+#define _Z kRegTypeBoolean
+#define _b kRegTypePosByte
+#define _B kRegTypeByte
+#define _s kRegTypePosShort
+#define _S kRegTypeShort
+#define _C kRegTypeChar
+#define _I kRegTypeInteger
+#define _J kRegTypeLongLo
+#define _j kRegTypeLongHi
+#define _D kRegTypeDoubleLo
+#define _d kRegTypeDoubleHi
+
+/*
+ * Merge result table for primitive values. The table is symmetric along
+ * the diagonal.
+ *
+ * Note that 32-bit int/float do not merge into 64-bit long/double. This
+ * is a register merge, not a widening conversion. Only the "implicit"
+ * widening within a category, e.g. byte to short, is allowed.
+ *
+ * Because Dalvik does not draw a distinction between int and float, we
+ * have to allow free exchange between 32-bit int/float and 64-bit
+ * long/double.
+ *
+ * Note that Uninit+Uninit=Uninit. This holds true because we only
+ * use this when the RegType value is exactly equal to kRegTypeUninit, which
+ * can only happen for the zeroeth entry in the table.
+ *
+ * "Unknown" never merges with anything known. The only time a register
+ * transitions from "unknown" to "known" is when we're executing code
+ * for the first time, and we handle that with a simple copy.
+ */
+const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX] =
+{
+ /* chk: _ U X F 0 1 Z b B s S C I J j D d */
+ { /*_*/ __,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X },
+ { /*U*/ _X,_U,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X },
+ { /*X*/ _X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X },
+ { /*F*/ _X,_X,_X,_F,_F,_F,_F,_F,_F,_F,_F,_F,_F,_X,_X,_X,_X },
+ { /*0*/ _X,_X,_X,_F,_0,_Z,_Z,_b,_B,_s,_S,_C,_I,_X,_X,_X,_X },
+ { /*1*/ _X,_X,_X,_F,_Z,_1,_Z,_b,_B,_s,_S,_C,_I,_X,_X,_X,_X },
+ { /*Z*/ _X,_X,_X,_F,_Z,_Z,_Z,_b,_B,_s,_S,_C,_I,_X,_X,_X,_X },
+ { /*b*/ _X,_X,_X,_F,_b,_b,_b,_b,_B,_s,_S,_C,_I,_X,_X,_X,_X },
+ { /*B*/ _X,_X,_X,_F,_B,_B,_B,_B,_B,_S,_S,_I,_I,_X,_X,_X,_X },
+ { /*s*/ _X,_X,_X,_F,_s,_s,_s,_s,_S,_s,_S,_C,_I,_X,_X,_X,_X },
+ { /*S*/ _X,_X,_X,_F,_S,_S,_S,_S,_S,_S,_S,_I,_I,_X,_X,_X,_X },
+ { /*C*/ _X,_X,_X,_F,_C,_C,_C,_C,_I,_C,_I,_C,_I,_X,_X,_X,_X },
+ { /*I*/ _X,_X,_X,_F,_I,_I,_I,_I,_I,_I,_I,_I,_I,_X,_X,_X,_X },
+ { /*J*/ _X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_J,_X,_J,_X },
+ { /*j*/ _X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_j,_X,_j },
+ { /*D*/ _X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_J,_X,_D,_X },
+ { /*d*/ _X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_X,_j,_X,_d },
+};
+
+#undef __
+#undef _U
+#undef _X
+#undef _F
+#undef _0
+#undef _1
+#undef _Z
+#undef _b
+#undef _B
+#undef _s
+#undef _S
+#undef _C
+#undef _I
+#undef _J
+#undef _j
+#undef _D
+#undef _d
+
+#ifndef NDEBUG
+/*
+ * Verify symmetry in the conversion table.
+ */
+static void checkMergeTab(void)
+{
+ int i, j;
+
+ for (i = 0; i < kRegTypeMAX; i++) {
+ for (j = i; j < kRegTypeMAX; j++) {
+ if (gDvmMergeTab[i][j] != gDvmMergeTab[j][i]) {
+ LOGE("Symmetry violation: %d,%d vs %d,%d\n", i, j, j, i);
+ dvmAbort();
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Determine whether we can convert "srcType" to "checkType", where
+ * "checkType" is one of the category-1 non-reference types.
+ *
+ * 32-bit int and float are interchangeable.
+ */
+static bool canConvertTo1nr(RegType srcType, RegType checkType)
+{
+ static const char convTab
+ [kRegType1nrEND-kRegType1nrSTART+1][kRegType1nrEND-kRegType1nrSTART+1] =
+ {
+ /* chk: F 0 1 Z b B s S C I */
+ { /*F*/ 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+ { /*0*/ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { /*1*/ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { /*Z*/ 1, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { /*b*/ 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 },
+ { /*B*/ 1, 0, 0, 0, 0, 1, 0, 1, 0, 1 },
+ { /*s*/ 1, 0, 0, 0, 0, 0, 1, 1, 1, 1 },
+ { /*S*/ 1, 0, 0, 0, 0, 0, 0, 1, 0, 1 },
+ { /*C*/ 1, 0, 0, 0, 0, 0, 0, 0, 1, 1 },
+ { /*I*/ 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+ };
+
+ assert(checkType >= kRegType1nrSTART && checkType <= kRegType1nrEND);
+#if 0
+ if (checkType < kRegType1nrSTART || checkType > kRegType1nrEND) {
+ LOG_VFY("Unexpected checkType %d (srcType=%d)\n", checkType, srcType);
+ assert(false);
+ return false;
+ }
+#endif
+
+ //printf("convTab[%d][%d] = %d\n", srcType, checkType,
+ // convTab[srcType-kRegType1nrSTART][checkType-kRegType1nrSTART]);
+ if (srcType >= kRegType1nrSTART && srcType <= kRegType1nrEND)
+ return (bool) convTab[srcType-kRegType1nrSTART][checkType-kRegType1nrSTART];
+
+ return false;
+}
+
+/*
+ * Determine whether the types are compatible. In Dalvik, 64-bit doubles
+ * and longs are interchangeable.
+ */
+static bool canConvertTo2(RegType srcType, RegType checkType)
+{
+ return ((srcType == kRegTypeLongLo || srcType == kRegTypeDoubleLo) &&
+ (checkType == kRegTypeLongLo || checkType == kRegTypeDoubleLo));
+}
+
+/*
+ * Determine whether or not "instrType" and "targetType" are compatible,
+ * for purposes of getting or setting a value in a field or array. The
+ * idea is that an instruction with a category 1nr type (say, aget-short
+ * or iput-boolean) is accessing a static field, instance field, or array
+ * entry, and we want to make sure sure that the operation is legal.
+ *
+ * At a minimum, source and destination must have the same width. We
+ * further refine this to assert that "short" and "char" are not
+ * compatible, because the sign-extension is different on the "get"
+ * operations. As usual, "float" and "int" are interoperable.
+ *
+ * We're not considering the actual contents of the register, so we'll
+ * never get "pseudo-types" like kRegTypeZero or kRegTypePosShort. We
+ * could get kRegTypeUnknown in "targetType" if a field or array class
+ * lookup failed. Category 2 types and references are checked elsewhere.
+ */
+static bool checkFieldArrayStore1nr(RegType instrType, RegType targetType)
+{
+ if (instrType == targetType)
+ return true; /* quick positive; most common case */
+
+ if ((instrType == kRegTypeInteger && targetType == kRegTypeFloat) ||
+ (instrType == kRegTypeFloat && targetType == kRegTypeInteger))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Convert a VM PrimitiveType enum value to the equivalent RegType value.
+ */
+static RegType primitiveTypeToRegType(PrimitiveType primType)
+{
+ struct {
+ RegType regType; /* type equivalent */
+ PrimitiveType primType; /* verification */
+ } convTab[] = {
+ /* must match order of enum in Object.h */
+ { kRegTypeBoolean, PRIM_BOOLEAN },
+ { kRegTypeChar, PRIM_CHAR },
+ { kRegTypeFloat, PRIM_FLOAT },
+ { kRegTypeDoubleLo, PRIM_DOUBLE },
+ { kRegTypeByte, PRIM_BYTE },
+ { kRegTypeShort, PRIM_SHORT },
+ { kRegTypeInteger, PRIM_INT },
+ { kRegTypeLongLo, PRIM_LONG },
+ // PRIM_VOID
+ };
+
+ if (primType < 0 || primType > (int) (sizeof(convTab) / sizeof(convTab[0])))
+ {
+ assert(false);
+ return kRegTypeUnknown;
+ }
+
+ assert(convTab[primType].primType == primType);
+ return convTab[primType].regType;
+}
+
+/*
+ * Create a new uninitialized instance map.
+ *
+ * The map is allocated and populated with address entries. The addresses
+ * appear in ascending order to allow binary searching.
+ *
+ * Very few methods have 10 or more new-instance instructions; the
+ * majority have 0 or 1. Occasionally a static initializer will have 200+.
+ */
+UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
+ const InsnFlags* insnFlags, int newInstanceCount)
+{
+ const int insnsSize = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns;
+ UninitInstanceMap* uninitMap;
+ bool isInit = false;
+ int idx, addr;
+
+ if (isInitMethod(meth)) {
+ newInstanceCount++;
+ isInit = true;
+ }
+
+ /*
+ * Allocate the header and map as a single unit.
+ *
+ * TODO: consider having a static instance so we can avoid allocations.
+ * I don't think the verifier is guaranteed to be single-threaded when
+ * running in the VM (rather than dexopt), so that must be taken into
+ * account.
+ */
+ int size = offsetof(UninitInstanceMap, map) +
+ newInstanceCount * sizeof(uninitMap->map[0]);
+ uninitMap = calloc(1, size);
+ if (uninitMap == NULL)
+ return NULL;
+ uninitMap->numEntries = newInstanceCount;
+
+ idx = 0;
+ if (isInit) {
+ uninitMap->map[idx++].addr = kUninitThisArgAddr;
+ }
+
+ /*
+ * Run through and find the new-instance instructions.
+ */
+ for (addr = 0; addr < insnsSize; /**/) {
+ int width = dvmInsnGetWidth(insnFlags, addr);
+
+ if ((*insns & 0xff) == OP_NEW_INSTANCE)
+ uninitMap->map[idx++].addr = addr;
+
+ addr += width;
+ insns += width;
+ }
+
+ assert(idx == newInstanceCount);
+ return uninitMap;
+}
+
+/*
+ * Free the map.
+ */
+void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap)
+{
+ free(uninitMap);
+}
+
+/*
+ * Set the class object associated with the instruction at "addr".
+ *
+ * Returns the map slot index, or -1 if the address isn't listed in the map
+ * (shouldn't happen) or if a class is already associated with the address
+ * (bad bytecode).
+ *
+ * Entries, once set, do not change -- a given address can only allocate
+ * one type of object.
+ */
+int dvmSetUninitInstance(UninitInstanceMap* uninitMap, int addr,
+ ClassObject* clazz)
+{
+ int idx;
+
+ assert(clazz != NULL);
+
+ /* TODO: binary search when numEntries > 8 */
+ for (idx = uninitMap->numEntries - 1; idx >= 0; idx--) {
+ if (uninitMap->map[idx].addr == addr) {
+ if (uninitMap->map[idx].clazz != NULL &&
+ uninitMap->map[idx].clazz != clazz)
+ {
+ LOG_VFY("VFY: addr %d already set to %p, not setting to %p\n",
+ addr, uninitMap->map[idx].clazz, clazz);
+ return -1; // already set to something else??
+ }
+ uninitMap->map[idx].clazz = clazz;
+ return idx;
+ }
+ }
+
+ LOG_VFY("VFY: addr %d not found in uninit map\n", addr);
+ assert(false); // shouldn't happen
+ return -1;
+}
+
+/*
+ * Get the class object at the specified index.
+ */
+ClassObject* dvmGetUninitInstance(const UninitInstanceMap* uninitMap, int idx)
+{
+ assert(idx >= 0 && idx < uninitMap->numEntries);
+ return uninitMap->map[idx].clazz;
+}
+
+/* determine if "type" is actually an object reference (init/uninit/zero) */
+static inline bool regTypeIsReference(RegType type) {
+ return (type > kRegTypeMAX || type == kRegTypeUninit ||
+ type == kRegTypeZero);
+}
+
+/* determine if "type" is an uninitialized object reference */
+static inline bool regTypeIsUninitReference(RegType type) {
+ return ((type & kRegTypeUninitMask) == kRegTypeUninit);
+}
+
+/* convert the initialized reference "type" to a ClassObject pointer */
+/* (does not expect uninit ref types or "zero") */
+static ClassObject* regTypeInitializedReferenceToClass(RegType type)
+{
+ assert(regTypeIsReference(type) && type != kRegTypeZero);
+ if ((type & 0x01) == 0) {
+ return (ClassObject*) type;
+ } else {
+ //LOG_VFY("VFY: attempted to use uninitialized reference\n");
+ return NULL;
+ }
+}
+
+/* extract the index into the uninitialized instance map table */
+static inline int regTypeToUninitIndex(RegType type) {
+ assert(regTypeIsUninitReference(type));
+ return (type & ~kRegTypeUninitMask) >> kRegTypeUninitShift;
+}
+
+/* convert the reference "type" to a ClassObject pointer */
+static ClassObject* regTypeReferenceToClass(RegType type,
+ const UninitInstanceMap* uninitMap)
+{
+ assert(regTypeIsReference(type) && type != kRegTypeZero);
+ if (regTypeIsUninitReference(type)) {
+ assert(uninitMap != NULL);
+ return dvmGetUninitInstance(uninitMap, regTypeToUninitIndex(type));
+ } else {
+ return (ClassObject*) type;
+ }
+}
+
+/* convert the ClassObject pointer to an (initialized) register type */
+static inline RegType regTypeFromClass(ClassObject* clazz) {
+ return (u4) clazz;
+}
+
+/* return the RegType for the uninitialized reference in slot "uidx" */
+static RegType regTypeFromUninitIndex(int uidx) {
+ return (u4) (kRegTypeUninit | (uidx << kRegTypeUninitShift));
+}
+
+
+/*
+ * ===========================================================================
+ * Signature operations
+ * ===========================================================================
+ */
+
+/*
+ * Is this method a constructor?
+ */
+static bool isInitMethod(const Method* meth)
+{
+ return (*meth->name == '<' && strcmp(meth->name+1, "init>") == 0);
+}
+
+/*
+ * Is this method a class initializer?
+ */
+static bool isClassInitMethod(const Method* meth)
+{
+ return (*meth->name == '<' && strcmp(meth->name+1, "clinit>") == 0);
+}
+
+/*
+ * Look up a class reference given as a simple string descriptor.
+ */
+static ClassObject* lookupClassByDescriptor(const Method* meth,
+ const char* pDescriptor, bool* pOkay)
+{
+ /*
+ * The javac compiler occasionally puts references to nonexistent
+ * classes in signatures. For example, if you have a non-static
+ * inner class with no constructor, the compiler provides
+ * a private <init> for you. Constructing the class
+ * requires <init>(parent), but the outer class can't call
+ * that because the method is private. So the compiler
+ * generates a package-scope <init>(parent,bogus) method that
+ * just calls the regular <init> (the "bogus" part being necessary
+ * to distinguish the signature of the synthetic method).
+ * Treating the bogus class as an instance of java.lang.Object
+ * allows the verifier to process the class successfully.
+ */
+
+ //LOGI("Looking up '%s'\n", typeStr);
+ ClassObject* clazz;
+ clazz = dvmFindClassNoInit(pDescriptor, meth->clazz->classLoader);
+ if (clazz == NULL) {
+ dvmClearOptException(dvmThreadSelf());
+ if (strchr(pDescriptor, '$') != NULL) {
+ LOGV("VFY: unable to find class referenced in signature (%s)\n",
+ pDescriptor);
+ } else {
+ LOG_VFY("VFY: unable to find class referenced in signature (%s)\n",
+ pDescriptor);
+ }
+
+ if (pDescriptor[0] == '[') {
+ /* We are looking at an array descriptor. */
+
+ /*
+ * There should never be a problem loading primitive arrays.
+ */
+ if (pDescriptor[1] != 'L' && pDescriptor[1] != '[') {
+ LOG_VFY("VFY: invalid char in signature in '%s'\n",
+ pDescriptor);
+ *pOkay = false;
+ }
+
+ /*
+ * Try to continue with base array type. This will let
+ * us pass basic stuff (e.g. get array len) that wouldn't
+ * fly with an Object. This is NOT correct if the
+ * missing type is a primitive array, but we should never
+ * have a problem loading those. (I'm not convinced this
+ * is correct or even useful. Just use Object here?)
+ */
+ clazz = dvmFindClassNoInit("[Ljava/lang/Object;",
+ meth->clazz->classLoader);
+ } else if (pDescriptor[0] == 'L') {
+ /*
+ * We are looking at a non-array reference descriptor;
+ * try to continue with base reference type.
+ */
+ clazz = gDvm.classJavaLangObject;
+ } else {
+ /* We are looking at a primitive type. */
+ LOG_VFY("VFY: invalid char in signature in '%s'\n", pDescriptor);
+ *pOkay = false;
+ }
+
+ if (clazz == NULL) {
+ *pOkay = false;
+ }
+ }
+
+ if (dvmIsPrimitiveClass(clazz)) {
+ LOG_VFY("VFY: invalid use of primitive type '%s'\n", pDescriptor);
+ *pOkay = false;
+ clazz = NULL;
+ }
+
+ return clazz;
+}
+
+/*
+ * Look up a class reference in a signature. Could be an arg or the
+ * return value.
+ *
+ * Advances "*pSig" to the last character in the signature (that is, to
+ * the ';').
+ *
+ * NOTE: this is also expected to verify the signature.
+ */
+static ClassObject* lookupSignatureClass(const Method* meth, const char** pSig,
+ bool* pOkay)
+{
+ const char* sig = *pSig;
+ const char* endp = sig;
+
+ assert(sig != NULL && *sig == 'L');
+
+ while (*++endp != ';' && *endp != '\0')
+ ;
+ if (*endp != ';') {
+ LOG_VFY("VFY: bad signature component '%s' (missing ';')\n", sig);
+ *pOkay = false;
+ return NULL;
+ }
+
+ endp++; /* Advance past the ';'. */
+ int typeLen = endp - sig;
+ char typeStr[typeLen+1]; /* +1 for the '\0' */
+ memcpy(typeStr, sig, typeLen);
+ typeStr[typeLen] = '\0';
+
+ *pSig = endp - 1; /* - 1 so that *pSig points at, not past, the ';' */
+
+ return lookupClassByDescriptor(meth, typeStr, pOkay);
+}
+
+/*
+ * Look up an array class reference in a signature. Could be an arg or the
+ * return value.
+ *
+ * Advances "*pSig" to the last character in the signature.
+ *
+ * NOTE: this is also expected to verify the signature.
+ */
+static ClassObject* lookupSignatureArrayClass(const Method* meth,
+ const char** pSig, bool* pOkay)
+{
+ const char* sig = *pSig;
+ const char* endp = sig;
+
+ assert(sig != NULL && *sig == '[');
+
+ /* find the end */
+ while (*++endp == '[' && *endp != '\0')
+ ;
+
+ if (*endp == 'L') {
+ while (*++endp != ';' && *endp != '\0')
+ ;
+ if (*endp != ';') {
+ LOG_VFY("VFY: bad signature component '%s' (missing ';')\n", sig);
+ *pOkay = false;
+ return NULL;
+ }
+ }
+
+ int typeLen = endp - sig +1;
+ char typeStr[typeLen+1];
+ memcpy(typeStr, sig, typeLen);
+ typeStr[typeLen] = '\0';
+
+ *pSig = endp;
+
+ return lookupClassByDescriptor(meth, typeStr, pOkay);
+}
+
+/*
+ * Set the register types for the first instruction in the method based on
+ * the method signature.
+ *
+ * This has the side-effect of validating the signature.
+ *
+ * Returns "true" on success.
+ */
+static bool setTypesFromSignature(const Method* meth, RegType* regTypes,
+ UninitInstanceMap* uninitMap)
+{
+ DexParameterIterator iterator;
+ int actualArgs, expectedArgs, argStart;
+ bool okay = true;
+
+ dexParameterIteratorInit(&iterator, &meth->prototype);
+ argStart = meth->registersSize - meth->insSize;
+ expectedArgs = meth->insSize; /* long/double count as two */
+ actualArgs = 0;
+
+ assert(argStart >= 0); /* should have been verified earlier */
+
+ /*
+ * Include the "this" pointer.
+ */
+ if (!dvmIsStaticMethod(meth)) {
+ /*
+ * If this is a constructor for a class other than java.lang.Object,
+ * mark the first ("this") argument as uninitialized. This restricts
+ * field access until the superclass constructor is called.
+ */
+ if (isInitMethod(meth) && meth->clazz != gDvm.classJavaLangObject) {
+ int uidx = dvmSetUninitInstance(uninitMap, kUninitThisArgAddr,
+ meth->clazz);
+ assert(uidx == 0);
+ regTypes[argStart + actualArgs] = regTypeFromUninitIndex(uidx);
+ } else {
+ regTypes[argStart + actualArgs] = regTypeFromClass(meth->clazz);
+ }
+ actualArgs++;
+ }
+
+ for (;;) {
+ const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
+
+ if (descriptor == NULL) {
+ break;
+ }
+
+ if (actualArgs >= expectedArgs) {
+ LOG_VFY("VFY: expected %d args, found more (%s)\n",
+ expectedArgs, descriptor);
+ goto bad_sig;
+ }
+
+ switch (*descriptor) {
+ case 'L':
+ case '[':
+ /*
+ * We assume that reference arguments are initialized. The
+ * only way it could be otherwise (assuming the caller was
+ * verified) is if the current method is <init>, but in that
+ * case it's effectively considered initialized the instant
+ * we reach here (in the sense that we can return without
+ * doing anything or call virtual methods).
+ */
+ {
+ ClassObject* clazz =
+ lookupClassByDescriptor(meth, descriptor, &okay);
+ if (!okay)
+ goto bad_sig;
+ regTypes[argStart + actualArgs] = regTypeFromClass(clazz);
+ }
+ actualArgs++;
+ break;
+ case 'Z':
+ regTypes[argStart + actualArgs] = kRegTypeBoolean;
+ actualArgs++;
+ break;
+ case 'C':
+ regTypes[argStart + actualArgs] = kRegTypeChar;
+ actualArgs++;
+ break;
+ case 'B':
+ regTypes[argStart + actualArgs] = kRegTypeByte;
+ actualArgs++;
+ break;
+ case 'I':
+ regTypes[argStart + actualArgs] = kRegTypeInteger;
+ actualArgs++;
+ break;
+ case 'S':
+ regTypes[argStart + actualArgs] = kRegTypeShort;
+ actualArgs++;
+ break;
+ case 'F':
+ regTypes[argStart + actualArgs] = kRegTypeFloat;
+ actualArgs++;
+ break;
+ case 'D':
+ regTypes[argStart + actualArgs] = kRegTypeDoubleLo;
+ regTypes[argStart + actualArgs +1] = kRegTypeDoubleHi;
+ actualArgs += 2;
+ break;
+ case 'J':
+ regTypes[argStart + actualArgs] = kRegTypeLongLo;
+ regTypes[argStart + actualArgs +1] = kRegTypeLongHi;
+ actualArgs += 2;
+ break;
+ default:
+ LOG_VFY("VFY: unexpected signature type char '%c'\n", *descriptor);
+ goto bad_sig;
+ }
+ }
+
+ if (actualArgs != expectedArgs) {
+ LOG_VFY("VFY: expected %d args, found %d\n", expectedArgs, actualArgs);
+ goto bad_sig;
+ }
+
+ const char* descriptor = dexProtoGetReturnType(&meth->prototype);
+
+ /*
+ * Validate return type. We don't do the type lookup; just want to make
+ * sure that it has the right format. Only major difference from the
+ * method argument format is that 'V' is supported.
+ */
+ switch (*descriptor) {
+ case 'I':
+ case 'C':
+ case 'S':
+ case 'B':
+ case 'Z':
+ case 'V':
+ case 'F':
+ case 'D':
+ case 'J':
+ if (*(descriptor+1) != '\0')
+ goto bad_sig;
+ break;
+ case '[':
+ /* single/multi, object/primitive */
+ while (*++descriptor == '[')
+ ;
+ if (*descriptor == 'L') {
+ while (*++descriptor != ';' && *descriptor != '\0')
+ ;
+ if (*descriptor != ';')
+ goto bad_sig;
+ } else {
+ if (*(descriptor+1) != '\0')
+ goto bad_sig;
+ }
+ break;
+ case 'L':
+ /* could be more thorough here, but shouldn't be required */
+ while (*++descriptor != ';' && *descriptor != '\0')
+ ;
+ if (*descriptor != ';')
+ goto bad_sig;
+ break;
+ default:
+ goto bad_sig;
+ }
+
+ return true;
+
+//fail:
+// LOG_VFY_METH(meth, "VFY: bad sig\n");
+// return false;
+
+bad_sig:
+ {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOG_VFY("VFY: bad signature '%s' for %s.%s\n",
+ desc, meth->clazz->descriptor, meth->name);
+ free(desc);
+ }
+ return false;
+}
+
+/*
+ * Return the register type for the method. We can't just use the
+ * already-computed DalvikJniReturnType, because if it's a reference type
+ * we need to do the class lookup.
+ *
+ * Returned references are assumed to be initialized.
+ *
+ * Returns kRegTypeUnknown for "void".
+ */
+static RegType getMethodReturnType(const Method* meth)
+{
+ RegType type;
+ const char* descriptor = dexProtoGetReturnType(&meth->prototype);
+
+ switch (*descriptor) {
+ case 'I':
+ type = kRegTypeInteger;
+ break;
+ case 'C':
+ type = kRegTypeChar;
+ break;
+ case 'S':
+ type = kRegTypeShort;
+ break;
+ case 'B':
+ type = kRegTypeByte;
+ break;
+ case 'Z':
+ type = kRegTypeBoolean;
+ break;
+ case 'V':
+ type = kRegTypeUnknown;
+ break;
+ case 'F':
+ type = kRegTypeFloat;
+ break;
+ case 'D':
+ type = kRegTypeDoubleLo;
+ break;
+ case 'J':
+ type = kRegTypeLongLo;
+ break;
+ case 'L':
+ case '[':
+ {
+ bool okay = true;
+ ClassObject* clazz =
+ lookupClassByDescriptor(meth, descriptor, &okay);
+ assert(okay);
+ type = regTypeFromClass(clazz);
+ }
+ break;
+ default:
+ /* we verified signature return type earlier, so this is impossible */
+ assert(false);
+ type = kRegTypeConflict;
+ break;
+ }
+
+ return type;
+}
+
+/*
+ * Convert a single-character signature value (i.e. a primitive type) to
+ * the corresponding RegType. This is intended for access to object fields
+ * holding primitive types.
+ *
+ * Returns kRegTypeUnknown for objects, arrays, and void.
+ */
+static RegType primSigCharToRegType(char sigChar)
+{
+ RegType type;
+
+ switch (sigChar) {
+ case 'I':
+ type = kRegTypeInteger;
+ break;
+ case 'C':
+ type = kRegTypeChar;
+ break;
+ case 'S':
+ type = kRegTypeShort;
+ break;
+ case 'B':
+ type = kRegTypeByte;
+ break;
+ case 'Z':
+ type = kRegTypeBoolean;
+ break;
+ case 'F':
+ type = kRegTypeFloat;
+ break;
+ case 'D':
+ type = kRegTypeDoubleLo;
+ break;
+ case 'J':
+ type = kRegTypeLongLo;
+ break;
+ case 'V':
+ case 'L':
+ case '[':
+ type = kRegTypeUnknown;
+ break;
+ default:
+ assert(false);
+ type = kRegTypeUnknown;
+ break;
+ }
+
+ return type;
+}
+
+/*
+ * Verify the arguments to a method. We're executing in "method", making
+ * a call to the method reference in vB.
+ *
+ * If this is a "direct" invoke, we allow calls to <init>. For calls to
+ * <init>, the first argument may be an uninitialized reference. Otherwise,
+ * calls to anything starting with '<' will be rejected, as will any
+ * uninitialized reference arguments.
+ *
+ * For non-static method calls, this will verify that the method call is
+ * appropriate for the "this" argument.
+ *
+ * The method reference is in vBBBB. The "isRange" parameter determines
+ * whether we use 0-4 "args" values or a range of registers defined by
+ * vAA and vCCCC.
+ *
+ * Widening conversions on integers and references are allowed, but
+ * narrowing conversions are not.
+ *
+ * Returns the resolved method on success, NULL (and sets "*pOkay" to "false")
+ * on failure.
+ */
+static Method* verifyInvocationArgs(const Method* meth, const RegType* insnRegs,
+ const int insnRegCount, const DecodedInstruction* pDecInsn,
+ UninitInstanceMap* uninitMap, MethodType methodType, bool isRange,
+ bool isSuper, bool* pOkay)
+{
+ Method* resMethod;
+ char* sigOriginal = NULL;
+
+ /*
+ * Resolve the method. This could be an abstract or concrete method
+ * depending on what sort of call we're making.
+ */
+ if (methodType == METHOD_INTERFACE) {
+ resMethod = dvmOptResolveInterfaceMethod(meth->clazz, pDecInsn->vB);
+ } else {
+ resMethod = dvmOptResolveMethod(meth->clazz, pDecInsn->vB, methodType);
+ }
+ if (resMethod == NULL) {
+ /* failed; print a meaningful failure message */
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexMethodId* pMethodId;
+ const char* methodName;
+ char* methodDesc;
+ const char* classDescriptor;
+
+ pMethodId = dexGetMethodId(pDexFile, pDecInsn->vB);
+ methodName = dexStringById(pDexFile, pMethodId->nameIdx);
+ methodDesc = dexCopyDescriptorFromMethodId(pDexFile, pMethodId);
+ classDescriptor = dexStringByTypeIdx(pDexFile, pMethodId->classIdx);
+
+ if (!gDvm.optimizing) {
+ char* dotMissingClass = dvmDescriptorToDot(classDescriptor);
+ char* dotMethClass = dvmDescriptorToDot(meth->clazz->descriptor);
+ //char* curMethodDesc =
+ // dexProtoCopyMethodDescriptor(&meth->prototype);
+
+ LOGE("Could not find method %s.%s, referenced from "
+ "method %s.%s\n",
+ dotMissingClass, methodName/*, methodDesc*/,
+ dotMethClass, meth->name/*, curMethodDesc*/);
+
+ free(dotMissingClass);
+ free(dotMethClass);
+ //free(curMethodDesc);
+ }
+
+ LOG_VFY("VFY: unable to resolve %s method %u: %s.%s %s\n",
+ dvmMethodTypeStr(methodType), pDecInsn->vB,
+ classDescriptor, methodName, methodDesc);
+ free(methodDesc);
+ goto fail;
+ }
+
+ /*
+ * Only time you can explicitly call a method starting with '<' is when
+ * making a "direct" invocation on "<init>". There are additional
+ * restrictions but we don't enforce them here.
+ */
+ if (resMethod->name[0] == '<') {
+ if (methodType != METHOD_DIRECT || !isInitMethod(resMethod)) {
+ LOG_VFY("VFY: invalid call to %s.%s\n",
+ resMethod->clazz->descriptor, resMethod->name);
+ goto bad_sig;
+ }
+ }
+
+ /*
+ * If we're using invoke-super(method), make sure that the executing
+ * method's class' superclass has a vtable entry for the target method.
+ */
+ if (isSuper) {
+ assert(methodType == METHOD_VIRTUAL);
+ ClassObject* super = meth->clazz->super;
+ if (super == NULL || resMethod->methodIndex > super->vtableCount) {
+ char* desc = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ LOG_VFY("VFY: invalid invoke-super from %s.%s to super %s.%s %s\n",
+ meth->clazz->descriptor, meth->name,
+ (super == NULL) ? "-" : super->descriptor,
+ resMethod->name, desc);
+ free(desc);
+ goto fail;
+ }
+ }
+
+ /*
+ * We use vAA as our expected arg count, rather than resMethod->insSize,
+ * because we need to match the call to the signature. Also, we might
+ * might be calling through an abstract method definition (which doesn't
+ * have register count values).
+ */
+ sigOriginal = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ const char* sig = sigOriginal;
+ int expectedArgs = pDecInsn->vA;
+ int actualArgs = 0;
+
+ if (!isRange && expectedArgs > 5) {
+ LOG_VFY("VFY: invalid arg count in non-range invoke (%d)\n",
+ pDecInsn->vA);
+ goto fail;
+ }
+ if (expectedArgs > meth->outsSize) {
+ LOG_VFY("VFY: invalid arg count (%d) exceeds outsSize (%d)\n",
+ expectedArgs, meth->outsSize);
+ goto fail;
+ }
+
+ if (*sig++ != '(')
+ goto bad_sig;
+
+ /*
+ * Check the "this" argument, which must be an instance of the class
+ * that declared the method. For an interface class, we don't do the
+ * full interface merge, so we can't do a rigorous check here (which
+ * is okay since we have to do it at runtime).
+ */
+ if (!dvmIsStaticMethod(resMethod)) {
+ ClassObject* actualThisRef;
+ RegType actualArgType;
+
+ actualArgType = getInvocationThis(insnRegs, insnRegCount, pDecInsn,
+ pOkay);
+ if (!*pOkay)
+ goto fail;
+
+ if (regTypeIsUninitReference(actualArgType) && resMethod->name[0] != '<')
+ {
+ LOG_VFY("VFY: 'this' arg must be initialized\n");
+ goto fail;
+ }
+ if (methodType != METHOD_INTERFACE && actualArgType != kRegTypeZero) {
+ actualThisRef = regTypeReferenceToClass(actualArgType, uninitMap);
+ if (!dvmInstanceof(actualThisRef, resMethod->clazz)) {
+ LOG_VFY("VFY: 'this' arg '%s' not instance of '%s'\n",
+ actualThisRef->descriptor,
+ resMethod->clazz->descriptor);
+ goto fail;
+ }
+ }
+ actualArgs++;
+ }
+
+ /*
+ * Process the target method's signature. This signature may or may not
+ * have been verified, so we can't assume it's properly formed.
+ */
+ while (*sig != '\0' && *sig != ')') {
+ if (actualArgs >= expectedArgs) {
+ LOG_VFY("VFY: expected %d args, found more (%c)\n",
+ expectedArgs, *sig);
+ goto bad_sig;
+ }
+
+ u4 getReg;
+ if (isRange)
+ getReg = pDecInsn->vC + actualArgs;
+ else
+ getReg = pDecInsn->arg[actualArgs];
+
+ switch (*sig) {
+ case 'L':
+ {
+ ClassObject* clazz = lookupSignatureClass(meth, &sig, pOkay);
+ if (!*pOkay)
+ goto bad_sig;
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ regTypeFromClass(clazz), pOkay);
+ if (!*pOkay) {
+ LOG_VFY("VFY: bad arg %d (into %s)\n",
+ actualArgs, clazz->descriptor);
+ goto bad_sig;
+ }
+ }
+ actualArgs++;
+ break;
+ case '[':
+ {
+ ClassObject* clazz =
+ lookupSignatureArrayClass(meth, &sig, pOkay);
+ if (!*pOkay)
+ goto bad_sig;
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ regTypeFromClass(clazz), pOkay);
+ if (!*pOkay) {
+ LOG_VFY("VFY: bad arg %d (into %s)\n",
+ actualArgs, clazz->descriptor);
+ goto bad_sig;
+ }
+ }
+ actualArgs++;
+ break;
+ case 'Z':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeBoolean, pOkay);
+ actualArgs++;
+ break;
+ case 'C':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeChar, pOkay);
+ actualArgs++;
+ break;
+ case 'B':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeByte, pOkay);
+ actualArgs++;
+ break;
+ case 'I':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeInteger, pOkay);
+ actualArgs++;
+ break;
+ case 'S':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeShort, pOkay);
+ actualArgs++;
+ break;
+ case 'F':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeFloat, pOkay);
+ actualArgs++;
+ break;
+ case 'D':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeDoubleLo, pOkay);
+ actualArgs += 2;
+ break;
+ case 'J':
+ verifyRegisterType(insnRegs, insnRegCount, getReg,
+ kRegTypeLongLo, pOkay);
+ actualArgs += 2;
+ break;
+ default:
+ LOG_VFY("VFY: invocation target: bad signature type char '%c'\n",
+ *sig);
+ goto bad_sig;
+ }
+
+ sig++;
+ }
+ if (*sig != ')') {
+ char* desc = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ LOG_VFY("VFY: invocation target: bad signature '%s'\n", desc);
+ free(desc);
+ goto bad_sig;
+ }
+
+ if (actualArgs != expectedArgs) {
+ LOG_VFY("VFY: expected %d args, found %d\n", expectedArgs, actualArgs);
+ goto bad_sig;
+ }
+
+ free(sigOriginal);
+ return resMethod;
+
+bad_sig:
+ if (resMethod != NULL) {
+ char* desc = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ LOG_VFY("VFY: rejecting call to %s.%s %s\n",
+ resMethod->clazz->descriptor, resMethod->name, desc);
+ free(desc);
+ }
+
+fail:
+ free(sigOriginal);
+ *pOkay = false;
+ return NULL;
+}
+
+/*
+ * Get the class object for the type of data stored in a field. This isn't
+ * stored in the Field struct, so we have to recover it from the signature.
+ *
+ * This only works for reference types. Don't call this for primitive types.
+ *
+ * If we can't find the class, we return java.lang.Object, so that
+ * verification can continue if a field is only accessed in trivial ways.
+ */
+static ClassObject* getFieldClass(const Method* meth, const Field* field)
+{
+ ClassObject* fieldClass;
+ const char* signature = field->signature;
+
+ if ((*signature == 'L') || (*signature == '[')) {
+ fieldClass = dvmFindClassNoInit(signature,
+ meth->clazz->classLoader);
+ } else {
+ return NULL;
+ }
+
+ if (fieldClass == NULL) {
+ dvmClearOptException(dvmThreadSelf());
+ LOGV("VFY: unable to find class '%s' for field %s.%s, trying Object\n",
+ field->signature, meth->clazz->descriptor, field->name);
+ fieldClass = gDvm.classJavaLangObject;
+ } else {
+ assert(!dvmIsPrimitiveClass(fieldClass));
+ }
+ return fieldClass;
+}
+
+
+/*
+ * ===========================================================================
+ * Register operations
+ * ===========================================================================
+ */
+
+/*
+ * Get the type of register N, verifying that the register is valid.
+ *
+ * Sets "*pOkay" to false if the register number is out of range.
+ */
+static inline RegType getRegisterType(const RegType* insnRegs,
+ const int insnRegCount, u4 vsrc, bool* pOkay)
+{
+ RegType type;
+
+ if (vsrc >= (u4) insnRegCount) {
+ *pOkay = false;
+ return kRegTypeUnknown;
+ } else {
+ return insnRegs[vsrc];
+ }
+}
+
+/*
+ * Get the value from a register, and cast it to a ClassObject. Sets
+ * "pOkay" to false if something fails.
+ *
+ * This fails if the register holds an uninitialized class.
+ *
+ * If the register holds kRegTypeZero, this returns a NULL pointer.
+ */
+static ClassObject* getClassFromRegister(const RegType* insnRegs,
+ const int insnRegCount, u4 vsrc, bool* pOkay)
+{
+ ClassObject* clazz = NULL;
+ RegType type;
+
+ /* get the element type of the array held in vsrc */
+ type = getRegisterType(insnRegs, insnRegCount, vsrc, pOkay);
+ if (!*pOkay)
+ goto bail;
+
+ /* if "always zero", we allow it to fail at runtime */
+ if (type == kRegTypeZero)
+ goto bail;
+
+ if (!regTypeIsReference(type)) {
+ LOG_VFY("VFY: tried to get class from non-ref register v%d (type=%d)\n",
+ vsrc, type);
+ *pOkay = false;
+ goto bail;
+ }
+ if (regTypeIsUninitReference(type)) {
+ LOG_VFY("VFY: register %u holds uninitialized reference\n", vsrc);
+ *pOkay = false;
+ goto bail;
+ }
+
+ clazz = regTypeInitializedReferenceToClass(type);
+
+bail:
+ return clazz;
+}
+
+/*
+ * Get the "this" pointer from a non-static method invocation. This
+ * returns the RegType so the caller can decide whether it needs the
+ * reference to be initialized or not. (Can also return kRegTypeZero
+ * if the reference can only be zero at this point.)
+ *
+ * The argument count is in vA, and the first argument is in vC, for both
+ * "simple" and "range" versions. We just need to make sure vA is >= 1
+ * and then return vC.
+ */
+static RegType getInvocationThis(const RegType* insnRegs,
+ const int insnRegCount, const DecodedInstruction* pDecInsn, bool* pOkay)
+{
+ RegType thisType = kRegTypeUnknown;
+
+ if (pDecInsn->vA < 1) {
+ LOG_VFY("VFY: invoke lacks 'this'\n");
+ *pOkay = false;
+ goto bail;
+ }
+
+ /* get the element type of the array held in vsrc */
+ thisType = getRegisterType(insnRegs, insnRegCount, pDecInsn->vC, pOkay);
+ if (!*pOkay) {
+ LOG_VFY("VFY: failed to get this from register %u\n", pDecInsn->vC);
+ goto bail;
+ }
+
+ if (!regTypeIsReference(thisType)) {
+ LOG_VFY("VFY: tried to get class from non-ref register v%d (type=%d)\n",
+ pDecInsn->vC, thisType);
+ *pOkay = false;
+ goto bail;
+ }
+
+bail:
+ return thisType;
+}
+
+/*
+ * Set the type of register N, verifying that the register is valid. If
+ * "newType" is the "Lo" part of a 64-bit value, register N+1 will be
+ * set to "newType+1".
+ *
+ * Sets "*pOkay" to false if the register number is out of range.
+ */
+static void setRegisterType(RegType* insnRegs, const int insnRegCount,
+ u4 vdst, RegType newType, bool* pOkay)
+{
+ //LOGD("set-reg v%u = %d\n", vdst, newType);
+ switch (newType) {
+ case kRegTypeUnknown:
+ case kRegTypeBoolean:
+ case kRegTypeOne:
+ case kRegTypeByte:
+ case kRegTypePosByte:
+ case kRegTypeShort:
+ case kRegTypePosShort:
+ case kRegTypeChar:
+ case kRegTypeInteger:
+ case kRegTypeFloat:
+ case kRegTypeZero:
+ if (vdst >= (u4) insnRegCount) {
+ *pOkay = false;
+ } else {
+ insnRegs[vdst] = newType;
+ }
+ break;
+ case kRegTypeLongLo:
+ case kRegTypeDoubleLo:
+ if (vdst+1 >= (u4) insnRegCount) {
+ *pOkay = false;
+ } else {
+ insnRegs[vdst] = newType;
+ insnRegs[vdst+1] = newType+1;
+ }
+ break;
+ case kRegTypeLongHi:
+ case kRegTypeDoubleHi:
+ /* should never set these explicitly */
+ *pOkay = false;
+ break;
+
+ case kRegTypeUninit:
+ default:
+ if (regTypeIsReference(newType)) {
+ if (vdst >= (u4) insnRegCount) {
+ *pOkay = false;
+ break;
+ }
+ insnRegs[vdst] = newType;
+
+ /*
+ * In most circumstances we won't see a reference to a primitive
+ * class here (e.g. "D"), since that would mean the object in the
+ * register is actually a primitive type. It can happen as the
+ * result of an assumed-successful check-cast instruction in
+ * which the second argument refers to a primitive class. (In
+ * practice, such an instruction will always throw an exception.)
+ *
+ * This is not an issue for instructions like const-class, where
+ * the object in the register is a java.lang.Class instance.
+ */
+ break;
+ }
+ /* bad - fall through */
+
+ case kRegTypeConflict: // should only be set during a merge
+ LOG_VFY("Unexpected set type %d\n", newType);
+ assert(false);
+ *pOkay = false;
+ break;
+ }
+}
+
+/*
+ * Verify that the contents of the specified register have the specified
+ * type (or can be converted to it through an implicit widening conversion).
+ *
+ * In theory we could use this to modify the type of the source register,
+ * e.g. a generic 32-bit constant, once used as a float, would thereafter
+ * remain a float. There is no compelling reason to require this though.
+ *
+ * If "vsrc" is a reference, both it and the "vsrc" register must be
+ * initialized ("vsrc" may be Zero). This will verify that the value in
+ * the register is an instance of checkType, or if checkType is an
+ * interface, verify that the register implements checkType.
+ */
+static void verifyRegisterType(const RegType* insnRegs, const int insnRegCount,
+ u4 vsrc, RegType checkType, bool* pOkay)
+{
+ if (vsrc >= (u4) insnRegCount) {
+ *pOkay = false;
+ return;
+ }
+
+ RegType srcType = insnRegs[vsrc];
+
+ //LOGD("check-reg v%u = %d\n", vsrc, checkType);
+ switch (checkType) {
+ case kRegTypeFloat:
+ case kRegTypeBoolean:
+ case kRegTypePosByte:
+ case kRegTypeByte:
+ case kRegTypePosShort:
+ case kRegTypeShort:
+ case kRegTypeChar:
+ case kRegTypeInteger:
+ if (!canConvertTo1nr(srcType, checkType)) {
+ LOG_VFY("VFY: register1 v%u type %d, wanted %d\n",
+ vsrc, srcType, checkType);
+ *pOkay = false;
+ }
+ break;
+ case kRegTypeLongLo:
+ case kRegTypeDoubleLo:
+ if (vsrc+1 >= (u4) insnRegCount) {
+ LOG_VFY("VFY: register2 v%u out of range (%d)\n",
+ vsrc, insnRegCount);
+ *pOkay = false;
+ } else if (insnRegs[vsrc+1] != srcType+1) {
+ LOG_VFY("VFY: register2 v%u-%u values %d,%d\n",
+ vsrc, vsrc+1, insnRegs[vsrc], insnRegs[vsrc+1]);
+ *pOkay = false;
+ } else if (!canConvertTo2(srcType, checkType)) {
+ LOG_VFY("VFY: register2 v%u type %d, wanted %d\n",
+ vsrc, srcType, checkType);
+ *pOkay = false;
+ }
+ break;
+
+ case kRegTypeLongHi:
+ case kRegTypeDoubleHi:
+ case kRegTypeZero:
+ case kRegTypeOne:
+ case kRegTypeUnknown:
+ case kRegTypeConflict:
+ /* should never be checking for these explicitly */
+ assert(false);
+ *pOkay = false;
+ return;
+ case kRegTypeUninit:
+ default:
+ /* make sure checkType is initialized reference */
+ if (!regTypeIsReference(checkType)) {
+ LOG_VFY("VFY: unexpected check type %d\n", checkType);
+ assert(false);
+ *pOkay = false;
+ break;
+ }
+ if (regTypeIsUninitReference(checkType)) {
+ LOG_VFY("VFY: uninitialized ref not expected as reg check\n");
+ *pOkay = false;
+ break;
+ }
+ /* make sure srcType is initialized reference or always-NULL */
+ if (!regTypeIsReference(srcType)) {
+ LOG_VFY("VFY: register1 v%u type %d, wanted ref\n", vsrc, srcType);
+ *pOkay = false;
+ break;
+ }
+ if (regTypeIsUninitReference(srcType)) {
+ LOG_VFY("VFY: register1 v%u holds uninitialized ref\n", vsrc);
+ *pOkay = false;
+ break;
+ }
+ /* if the register isn't Zero, make sure it's an instance of check */
+ if (srcType != kRegTypeZero) {
+ ClassObject* srcClass = regTypeInitializedReferenceToClass(srcType);
+ ClassObject* checkClass = regTypeInitializedReferenceToClass(checkType);
+ assert(srcClass != NULL);
+ assert(checkClass != NULL);
+
+ if (dvmIsInterfaceClass(checkClass)) {
+ /*
+ * All objects implement all interfaces as far as the
+ * verifier is concerned. The runtime has to sort it out.
+ * See comments above findCommonSuperclass.
+ */
+ /*
+ if (srcClass != checkClass &&
+ !dvmImplements(srcClass, checkClass))
+ {
+ LOG_VFY("VFY: %s does not implement %s\n",
+ srcClass->descriptor, checkClass->descriptor);
+ *pOkay = false;
+ }
+ */
+ } else {
+ if (!dvmInstanceof(srcClass, checkClass)) {
+ LOG_VFY("VFY: %s is not instance of %s\n",
+ srcClass->descriptor, checkClass->descriptor);
+ *pOkay = false;
+ }
+ }
+ }
+ break;
+ }
+}
+
+/*
+ * Set the type of the "result" register. Mostly this exists to expand
+ * "insnRegCount" to encompass the result register.
+ */
+static void setResultRegisterType(RegType* insnRegs, const int insnRegCount,
+ RegType newType, bool* pOkay)
+{
+ setRegisterType(insnRegs, insnRegCount + kExtraRegs,
+ RESULT_REGISTER(insnRegCount), newType, pOkay);
+}
+
+
+/*
+ * Update all registers holding "uninitType" to instead hold the
+ * corresponding initialized reference type. This is called when an
+ * appropriate <init> method is invoked -- all copies of the reference
+ * must be marked as initialized.
+ */
+static void markRefsAsInitialized(RegType* insnRegs, int insnRegCount,
+ UninitInstanceMap* uninitMap, RegType uninitType, bool* pOkay)
+{
+ ClassObject* clazz;
+ RegType initType;
+ int i, changed;
+
+ clazz = dvmGetUninitInstance(uninitMap, regTypeToUninitIndex(uninitType));
+ if (clazz == NULL) {
+ LOGE("VFY: unable to find type=0x%x (idx=%d)\n",
+ uninitType, regTypeToUninitIndex(uninitType));
+ *pOkay = false;
+ return;
+ }
+ initType = regTypeFromClass(clazz);
+
+ changed = 0;
+ for (i = 0; i < insnRegCount; i++) {
+ if (insnRegs[i] == uninitType) {
+ insnRegs[i] = initType;
+ changed++;
+ }
+ }
+ //LOGD("VFY: marked %d registers as initialized\n", changed);
+ assert(changed > 0);
+
+ return;
+}
+
+/*
+ * We're creating a new instance of class C at address A. Any registers
+ * holding instances previously created at address A must be initialized
+ * by now. If not, we mark them as "conflict" to prevent them from being
+ * used (otherwise, markRefsAsInitialized would mark the old ones and the
+ * new ones at the same time).
+ */
+static void markUninitRefsAsInvalid(RegType* insnRegs, int insnRegCount,
+ UninitInstanceMap* uninitMap, RegType uninitType)
+{
+ int i, changed;
+
+ changed = 0;
+ for (i = 0; i < insnRegCount; i++) {
+ if (insnRegs[i] == uninitType) {
+ insnRegs[i] = kRegTypeConflict;
+ changed++;
+ }
+ }
+
+ //if (changed)
+ // LOGD("VFY: marked %d uninitialized registers as invalid\n", changed);
+}
+
+/*
+ * Find the start of the register set for the specified instruction in
+ * the current method.
+ */
+static inline RegType* getRegisterLine(const RegisterTable* regTable,
+ int insnIdx)
+{
+ return regTable->addrRegs[insnIdx];
+}
+
+/*
+ * Copy a bunch of registers.
+ */
+static inline void copyRegisters(RegType* dst, const RegType* src,
+ int numRegs)
+{
+ memcpy(dst, src, numRegs * sizeof(RegType));
+}
+
+/*
+ * Compare a bunch of registers.
+ *
+ * Returns 0 if they match. Using this for a sort is unwise, since the
+ * value can change based on machine endianness.
+ */
+static inline int compareRegisters(const RegType* src1, const RegType* src2,
+ int numRegs)
+{
+ return memcmp(src1, src2, numRegs * sizeof(RegType));
+}
+
+/*
+ * Register type categories, for type checking.
+ *
+ * The spec says category 1 includes boolean, byte, char, short, int, float,
+ * reference, and returnAddress. Category 2 includes long and double.
+ *
+ * We treat object references separately, so we have "category1nr". We
+ * don't support jsr/ret, so there is no "returnAddress" type.
+ */
+typedef enum TypeCategory {
+ kTypeCategoryUnknown = 0,
+ kTypeCategory1nr, // byte, char, int, float, boolean
+ kTypeCategory2, // long, double
+ kTypeCategoryRef, // object reference
+} TypeCategory;
+
+/*
+ * See if "type" matches "cat". All we're really looking for here is that
+ * we're not mixing and matching 32-bit and 64-bit quantities, and we're
+ * not mixing references with numerics. (For example, the arguments to
+ * "a < b" could be integers of different sizes, but they must both be
+ * integers. Dalvik is less specific about int vs. float, so we treat them
+ * as equivalent here.)
+ *
+ * For category 2 values, "type" must be the "low" half of the value.
+ *
+ * Sets "*pOkay" to false if not.
+ */
+static void checkTypeCategory(RegType type, TypeCategory cat, bool* pOkay)
+{
+ switch (cat) {
+ case kTypeCategory1nr:
+ switch (type) {
+ case kRegTypeFloat:
+ case kRegTypeZero:
+ case kRegTypeOne:
+ case kRegTypeBoolean:
+ case kRegTypePosByte:
+ case kRegTypeByte:
+ case kRegTypePosShort:
+ case kRegTypeShort:
+ case kRegTypeChar:
+ case kRegTypeInteger:
+ break;
+ default:
+ *pOkay = false;
+ break;
+ }
+ break;
+
+ case kTypeCategory2:
+ switch (type) {
+ case kRegTypeLongLo:
+ case kRegTypeDoubleLo:
+ break;
+ default:
+ *pOkay = false;
+ break;
+ }
+ break;
+
+ case kTypeCategoryRef:
+ if (type != kRegTypeZero && !regTypeIsReference(type))
+ *pOkay = false;
+ break;
+
+ default:
+ assert(false);
+ *pOkay = false;
+ break;
+ }
+}
+
+/*
+ * For a category 2 register pair, verify that "typeh" is the appropriate
+ * high part for "typel".
+ *
+ * Does not verify that "typel" is in fact the low part of a 64-bit
+ * register pair.
+ */
+static void checkWidePair(RegType typel, RegType typeh, bool* pOkay)
+{
+ if ((typeh != typel+1))
+ *pOkay = false;
+}
+
+/*
+ * Implement category-1 "move" instructions. Copy a 32-bit value from
+ * "vsrc" to "vdst".
+ *
+ * "insnRegCount" is the number of registers available. The "vdst" and
+ * "vsrc" values are checked against this.
+ */
+static void copyRegister1(RegType* insnRegs, int insnRegCount, u4 vdst,
+ u4 vsrc, TypeCategory cat, bool* pOkay)
+{
+ RegType type = getRegisterType(insnRegs, insnRegCount, vsrc, pOkay);
+ if (*pOkay)
+ checkTypeCategory(type, cat, pOkay);
+ if (*pOkay)
+ setRegisterType(insnRegs, insnRegCount, vdst, type, pOkay);
+
+ if (!*pOkay) {
+ LOG_VFY("VFY: copy1 v%u<-v%u type=%d cat=%d\n", vdst, vsrc, type, cat);
+ }
+}
+
+/*
+ * Implement category-2 "move" instructions. Copy a 64-bit value from
+ * "vsrc" to "vdst". This copies both halves of the register.
+ */
+static void copyRegister2(RegType* insnRegs, int insnRegCount, u4 vdst,
+ u4 vsrc, bool* pOkay)
+{
+ RegType typel = getRegisterType(insnRegs, insnRegCount, vsrc, pOkay);
+ RegType typeh = getRegisterType(insnRegs, insnRegCount, vsrc+1, pOkay);
+ if (*pOkay) {
+ checkTypeCategory(typel, kTypeCategory2, pOkay);
+ checkWidePair(typel, typeh, pOkay);
+ }
+ if (*pOkay)
+ setRegisterType(insnRegs, insnRegCount, vdst, typel, pOkay);
+
+ if (!*pOkay) {
+ LOG_VFY("VFY: copy2 v%u<-v%u type=%d/%d\n", vdst, vsrc, typel, typeh);
+ }
+}
+
+/*
+ * Implement "move-result". Copy the category-1 value from the result
+ * register to another register, and reset the result register.
+ *
+ * We can't just call copyRegister1 with an altered insnRegCount,
+ * because that would affect the test on "vdst" as well.
+ */
+static void copyResultRegister1(RegType* insnRegs, const int insnRegCount,
+ u4 vdst, TypeCategory cat, bool* pOkay)
+{
+ RegType type;
+ u4 vsrc;
+
+ vsrc = RESULT_REGISTER(insnRegCount);
+ type = getRegisterType(insnRegs, insnRegCount + kExtraRegs, vsrc, pOkay);
+ if (*pOkay)
+ checkTypeCategory(type, cat, pOkay);
+ if (*pOkay) {
+ setRegisterType(insnRegs, insnRegCount, vdst, type, pOkay);
+ insnRegs[vsrc] = kRegTypeUnknown;
+ }
+
+ if (!*pOkay) {
+ LOG_VFY("VFY: copyRes1 v%u<-v%u cat=%d type=%d\n",
+ vdst, vsrc, cat, type);
+ }
+}
+
+/*
+ * Implement "move-result-wide". Copy the category-2 value from the result
+ * register to another register, and reset the result register.
+ *
+ * We can't just call copyRegister2 with an altered insnRegCount,
+ * because that would affect the test on "vdst" as well.
+ */
+static void copyResultRegister2(RegType* insnRegs, const int insnRegCount,
+ u4 vdst, bool* pOkay)
+{
+ RegType typel, typeh;
+ u4 vsrc;
+
+ vsrc = RESULT_REGISTER(insnRegCount);
+ typel = getRegisterType(insnRegs, insnRegCount + kExtraRegs, vsrc, pOkay);
+ typeh = getRegisterType(insnRegs, insnRegCount + kExtraRegs, vsrc+1, pOkay);
+ if (*pOkay) {
+ checkTypeCategory(typel, kTypeCategory2, pOkay);
+ checkWidePair(typel, typeh, pOkay);
+ }
+ if (*pOkay) {
+ setRegisterType(insnRegs, insnRegCount, vdst, typel, pOkay);
+ insnRegs[vsrc] = kRegTypeUnknown;
+ insnRegs[vsrc+1] = kRegTypeUnknown;
+ }
+
+ if (!*pOkay) {
+ LOG_VFY("VFY: copyRes2 v%u<-v%u type=%d/%d\n",
+ vdst, vsrc, typel, typeh);
+ }
+}
+
+/*
+ * Verify types for a simple two-register instruction (e.g. "neg-int").
+ * "dstType" is stored into vA, and "srcType" is verified against vB.
+ */
+static void checkUnop(RegType* insnRegs, const int insnRegCount,
+ DecodedInstruction* pDecInsn, RegType dstType, RegType srcType,
+ bool* pOkay)
+{
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vB, srcType, pOkay);
+ setRegisterType(insnRegs, insnRegCount, pDecInsn->vA, dstType, pOkay);
+}
+
+/*
+ * We're performing an operation like "and-int/2addr" that can be
+ * performed on booleans as well as integers. We get no indication of
+ * boolean-ness, but we can infer it from the types of the arguments.
+ *
+ * Assumes we've already validated reg1/reg2.
+ *
+ * Returns true if both args are Boolean, Zero, or One.
+ */
+static bool upcastBooleanOp(RegType* insnRegs, const int insnRegCount,
+ u4 reg1, u4 reg2)
+{
+ RegType type1, type2;
+
+ type1 = insnRegs[reg1];
+ type2 = insnRegs[reg2];
+
+ if ((type1 == kRegTypeBoolean || type1 == kRegTypeZero ||
+ type1 == kRegTypeOne) &&
+ (type2 == kRegTypeBoolean || type2 == kRegTypeZero ||
+ type2 == kRegTypeOne))
+ {
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Verify types for A two-register instruction with a literal constant
+ * (e.g. "add-int/lit8"). "dstType" is stored into vA, and "srcType" is
+ * verified against vB.
+ *
+ * If "checkBooleanOp" is set, we use the constant value in vC.
+ */
+static void checkLitop(RegType* insnRegs, const int insnRegCount,
+ DecodedInstruction* pDecInsn, RegType dstType, RegType srcType,
+ bool checkBooleanOp, bool* pOkay)
+{
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vB, srcType, pOkay);
+ if (*pOkay && checkBooleanOp) {
+ assert(dstType == kRegTypeInteger);
+ /* check vB with the call, then check the constant manually */
+ if (upcastBooleanOp(insnRegs, insnRegCount, pDecInsn->vB, pDecInsn->vB)
+ && (pDecInsn->vC == 0 || pDecInsn->vC == 1))
+ {
+ dstType = kRegTypeBoolean;
+ }
+ }
+ setRegisterType(insnRegs, insnRegCount, pDecInsn->vA, dstType, pOkay);
+}
+
+/*
+ * Verify types for a simple three-register instruction (e.g. "add-int").
+ * "dstType" is stored into vA, and "srcType1"/"srcType2" are verified
+ * against vB/vC.
+ */
+static void checkBinop(RegType* insnRegs, const int insnRegCount,
+ DecodedInstruction* pDecInsn, RegType dstType, RegType srcType1,
+ RegType srcType2, bool checkBooleanOp, bool* pOkay)
+{
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vB, srcType1, pOkay);
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vC, srcType2, pOkay);
+ if (*pOkay && checkBooleanOp) {
+ assert(dstType == kRegTypeInteger);
+ if (upcastBooleanOp(insnRegs, insnRegCount, pDecInsn->vB, pDecInsn->vC))
+ dstType = kRegTypeBoolean;
+ }
+ setRegisterType(insnRegs, insnRegCount, pDecInsn->vA, dstType, pOkay);
+}
+
+/*
+ * Verify types for a binary "2addr" operation. "srcType1"/"srcType2"
+ * are verified against vA/vB, then "dstType" is stored into vA.
+ */
+static void checkBinop2addr(RegType* insnRegs, const int insnRegCount,
+ DecodedInstruction* pDecInsn, RegType dstType, RegType srcType1,
+ RegType srcType2, bool checkBooleanOp, bool* pOkay)
+{
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vA, srcType1, pOkay);
+ verifyRegisterType(insnRegs, insnRegCount, pDecInsn->vB, srcType2, pOkay);
+ if (*pOkay && checkBooleanOp) {
+ assert(dstType == kRegTypeInteger);
+ if (upcastBooleanOp(insnRegs, insnRegCount, pDecInsn->vA, pDecInsn->vB))
+ dstType = kRegTypeBoolean;
+ }
+ setRegisterType(insnRegs, insnRegCount, pDecInsn->vA, dstType, pOkay);
+}
+
+
+/*
+ * ===========================================================================
+ * Register merge
+ * ===========================================================================
+ */
+
+/*
+ * Compute the "class depth" of a class. This is the distance from the
+ * class to the top of the tree, chasing superclass links. java.lang.Object
+ * has a class depth of 0.
+ */
+static int getClassDepth(ClassObject* clazz)
+{
+ int depth = 0;
+
+ while (clazz->super != NULL) {
+ clazz = clazz->super;
+ depth++;
+ }
+ return depth;
+}
+
+/*
+ * Given two classes, walk up the superclass tree to find a common
+ * ancestor. (Called from findCommonSuperclass().)
+ *
+ * TODO: consider caching the class depth in the class object so we don't
+ * have to search for it here.
+ */
+static ClassObject* digForSuperclass(ClassObject* c1, ClassObject* c2)
+{
+ int depth1, depth2;
+
+ depth1 = getClassDepth(c1);
+ depth2 = getClassDepth(c2);
+
+ if (gDebugVerbose) {
+ LOGVV("COMMON: %s(%d) + %s(%d)\n",
+ c1->descriptor, depth1, c2->descriptor, depth2);
+ }
+
+ /* pull the deepest one up */
+ if (depth1 > depth2) {
+ while (depth1 > depth2) {
+ c1 = c1->super;
+ depth1--;
+ }
+ } else {
+ while (depth2 > depth1) {
+ c2 = c2->super;
+ depth2--;
+ }
+ }
+
+ /* walk up in lock-step */
+ while (c1 != c2) {
+ c1 = c1->super;
+ c2 = c2->super;
+
+ assert(c1 != NULL && c2 != NULL);
+ }
+
+ if (gDebugVerbose) {
+ LOGVV(" : --> %s\n", c1->descriptor);
+ }
+ return c1;
+}
+
+/*
+ * Merge two array classes. We can't use the general "walk up to the
+ * superclass" merge because the superclass of an array is always Object.
+ * We want String[] + Integer[] = Object[]. This works for higher dimensions
+ * as well, e.g. String[][] + Integer[][] = Object[][].
+ *
+ * If Foo1 and Foo2 are subclasses of Foo, Foo1[] + Foo2[] = Foo[].
+ *
+ * If Class implements Type, Class[] + Type[] = Type[].
+ *
+ * If the dimensions don't match, we want to convert to an array of Object
+ * with the least dimension, e.g. String[][] + String[][][][] = Object[][].
+ *
+ * This gets a little awkward because we may have to ask the VM to create
+ * a new array type with the appropriate element and dimensions. However, we
+ * shouldn't be doing this often.
+ */
+static ClassObject* findCommonArraySuperclass(ClassObject* c1, ClassObject* c2)
+{
+ ClassObject* arrayClass = NULL;
+ ClassObject* commonElem;
+ int i, numDims;
+
+ assert(c1->arrayDim > 0);
+ assert(c2->arrayDim > 0);
+
+ if (c1->arrayDim == c2->arrayDim) {
+ //commonElem = digForSuperclass(c1->elementClass, c2->elementClass);
+ commonElem = findCommonSuperclass(c1->elementClass, c2->elementClass);
+ numDims = c1->arrayDim;
+ } else {
+ if (c1->arrayDim < c2->arrayDim)
+ numDims = c1->arrayDim;
+ else
+ numDims = c2->arrayDim;
+ commonElem = c1->super; // == java.lang.Object
+ }
+
+ /* walk from the element to the (multi-)dimensioned array type */
+ for (i = 0; i < numDims; i++) {
+ arrayClass = dvmFindArrayClassForElement(commonElem);
+ commonElem = arrayClass;
+ }
+
+ LOGVV("ArrayMerge '%s' + '%s' --> '%s'\n",
+ c1->descriptor, c2->descriptor, arrayClass->descriptor);
+ return arrayClass;
+}
+
+/*
+ * Find the first common superclass of the two classes. We're not
+ * interested in common interfaces.
+ *
+ * The easiest way to do this for concrete classes is to compute the "class
+ * depth" of each, move up toward the root of the deepest one until they're
+ * at the same depth, then walk both up to the root until they match.
+ *
+ * If both classes are arrays of non-primitive types, we need to merge
+ * based on array depth and element type.
+ *
+ * If one class is an interface, we check to see if the other class/interface
+ * (or one of its predecessors) implements the interface. If so, we return
+ * the interface; otherwise, we return Object.
+ *
+ * NOTE: we continue the tradition of "lazy interface handling". To wit,
+ * suppose we have three classes:
+ * One implements Fancy, Free
+ * Two implements Fancy, Free
+ * Three implements Free
+ * where Fancy and Free are unrelated interfaces. The code requires us
+ * to merge One into Two. Ideally we'd use a common interface, which
+ * gives us a choice between Fancy and Free, and no guidance on which to
+ * use. If we use Free, we'll be okay when Three gets merged in, but if
+ * we choose Fancy, we're hosed. The "ideal" solution is to create a
+ * set of common interfaces and carry that around, merging further references
+ * into it. This is a pain. The easy solution is to simply boil them
+ * down to Objects and let the runtime invokeinterface call fail, which
+ * is what we do.
+ */
+static ClassObject* findCommonSuperclass(ClassObject* c1, ClassObject* c2)
+{
+ assert(!dvmIsPrimitiveClass(c1) && !dvmIsPrimitiveClass(c2));
+
+ if (c1 == c2)
+ return c1;
+
+ if (dvmIsInterfaceClass(c1) && dvmImplements(c2, c1)) {
+ if (gDebugVerbose)
+ LOGVV("COMMON/I1: %s + %s --> %s\n",
+ c1->descriptor, c2->descriptor, c1->descriptor);
+ return c1;
+ }
+ if (dvmIsInterfaceClass(c2) && dvmImplements(c1, c2)) {
+ if (gDebugVerbose)
+ LOGVV("COMMON/I2: %s + %s --> %s\n",
+ c1->descriptor, c2->descriptor, c2->descriptor);
+ return c2;
+ }
+
+ if (dvmIsArrayClass(c1) && dvmIsArrayClass(c2) &&
+ !dvmIsPrimitiveClass(c1->elementClass) &&
+ !dvmIsPrimitiveClass(c2->elementClass))
+ {
+ return findCommonArraySuperclass(c1, c2);
+ }
+
+ return digForSuperclass(c1, c2);
+}
+
+/*
+ * Merge two RegType values.
+ *
+ * Sets "*pChanged" to "true" if the result doesn't match "type1".
+ */
+static RegType mergeTypes(RegType type1, RegType type2, bool* pChanged)
+{
+ RegType result;
+
+ /*
+ * Check for trivial case so we don't have to hit memory.
+ */
+ if (type1 == type2)
+ return type1;
+
+ /*
+ * Use the table if we can, and reject any attempts to merge something
+ * from the table with a reference type.
+ *
+ * The uninitialized table entry at index zero *will* show up as a
+ * simple kRegTypeUninit value. Since this cannot be merged with
+ * anything but itself, the rules do the right thing.
+ */
+ if (type1 < kRegTypeMAX) {
+ if (type2 < kRegTypeMAX) {
+ result = gDvmMergeTab[type1][type2];
+ } else {
+ /* simple + reference == conflict, usually */
+ if (type1 == kRegTypeZero)
+ result = type2;
+ else
+ result = kRegTypeConflict;
+ }
+ } else {
+ if (type2 < kRegTypeMAX) {
+ /* reference + simple == conflict, usually */
+ if (type2 == kRegTypeZero)
+ result = type1;
+ else
+ result = kRegTypeConflict;
+ } else {
+ /* merging two references */
+ if (regTypeIsUninitReference(type1) ||
+ regTypeIsUninitReference(type2))
+ {
+ /* can't merge uninit with anything but self */
+ result = kRegTypeConflict;
+ } else {
+ ClassObject* clazz1 = regTypeInitializedReferenceToClass(type1);
+ ClassObject* clazz2 = regTypeInitializedReferenceToClass(type2);
+ ClassObject* mergedClass;
+
+ mergedClass = findCommonSuperclass(clazz1, clazz2);
+ assert(mergedClass != NULL);
+ result = regTypeFromClass(mergedClass);
+ }
+ }
+ }
+
+ if (result != type1)
+ *pChanged = true;
+ return result;
+}
+
+/*
+ * Control can transfer to "nextInsn".
+ *
+ * Merge the registers from "workRegs" into "regTypes" at "nextInsn", and
+ * set the "changed" flag on the target address if the registers have changed.
+ */
+static void updateRegisters(const Method* meth, InsnFlags* insnFlags,
+ RegisterTable* regTable, int nextInsn, const RegType* workRegs)
+{
+ RegType* targetRegs = getRegisterLine(regTable, nextInsn);
+ const int insnRegCount = meth->registersSize;
+
+#if 0
+ if (!dvmInsnIsBranchTarget(insnFlags, nextInsn)) {
+ LOGE("insnFlags[0x%x]=0x%08x\n", nextInsn, insnFlags[nextInsn]);
+ LOGE(" In %s.%s %s\n",
+ meth->clazz->descriptor, meth->name, meth->descriptor);
+ assert(false);
+ }
+#endif
+
+ if (!dvmInsnIsVisitedOrChanged(insnFlags, nextInsn)) {
+ /*
+ * We haven't processed this instruction before, and we haven't
+ * touched the registers here, so there's nothing to "merge". Copy
+ * the registers over and mark it as changed. (This is the only
+ * way a register can transition out of "unknown", so this is not
+ * just an optimization.)
+ */
+ LOGVV("COPY into 0x%04x\n", nextInsn);
+ copyRegisters(targetRegs, workRegs, insnRegCount + kExtraRegs);
+ dvmInsnSetChanged(insnFlags, nextInsn, true);
+ } else {
+ if (gDebugVerbose) {
+ LOGVV("MERGE into 0x%04x\n", nextInsn);
+ //dumpRegTypes(meth, insnFlags, targetRegs, 0, "targ", NULL, 0);
+ //dumpRegTypes(meth, insnFlags, workRegs, 0, "work", NULL, 0);
+ }
+ /* merge registers, set Changed only if different */
+ bool changed = false;
+ int i;
+
+ for (i = 0; i < insnRegCount + kExtraRegs; i++) {
+ targetRegs[i] = mergeTypes(targetRegs[i], workRegs[i], &changed);
+ }
+
+ if (gDebugVerbose) {
+ //LOGI(" RESULT (changed=%d)\n", changed);
+ //dumpRegTypes(meth, insnFlags, targetRegs, 0, "rslt", NULL, 0);
+ }
+
+ if (changed)
+ dvmInsnSetChanged(insnFlags, nextInsn, true);
+ }
+}
+
+
+/*
+ * ===========================================================================
+ * Utility functions
+ * ===========================================================================
+ */
+
+/*
+ * Look up an instance field, specified by "fieldIdx", that is going to be
+ * accessed in object "objType". This resolves the field and then verifies
+ * that the class containing the field is an instance of the reference in
+ * "objType".
+ *
+ * It is possible for "objType" to be kRegTypeZero, meaning that we might
+ * have a null reference. This is a runtime problem, so we allow it,
+ * skipping some of the type checks.
+ *
+ * In general, "objType" must be an initialized reference. However, we
+ * allow it to be uninitialized if this is an "<init>" method and the field
+ * is declared within the "objType" class.
+ *
+ * Returns an InstField on success, returns NULL and sets "*pOkay" to false
+ * on failure.
+ */
+static InstField* getInstField(const Method* meth,
+ const UninitInstanceMap* uninitMap, RegType objType, int fieldIdx,
+ bool* pOkay)
+{
+ InstField* instField = NULL;
+ ClassObject* objClass;
+ bool mustBeLocal = false;
+
+ if (!regTypeIsReference(objType)) {
+ LOG_VFY("VFY: attempt to access field of non-reference type %d\n",
+ objType);
+ *pOkay = false;
+ goto bail;
+ }
+
+ instField = dvmOptResolveInstField(meth->clazz, fieldIdx);
+ if (instField == NULL) {
+ LOG_VFY("VFY: unable to resolve instance field %u\n", fieldIdx);
+ *pOkay = false;
+ goto bail;
+ }
+
+ if (objType == kRegTypeZero)
+ goto bail;
+
+ /*
+ * Access to fields in uninitialized objects is allowed if this is
+ * the <init> method for the object and the field in question is
+ * declared by this class.
+ */
+ objClass = regTypeReferenceToClass(objType, uninitMap);
+ assert(objClass != NULL);
+ if (regTypeIsUninitReference(objType)) {
+ if (!isInitMethod(meth) || meth->clazz != objClass) {
+ LOG_VFY("VFY: attempt to access field via uninitialized ref\n");
+ *pOkay = false;
+ goto bail;
+ }
+ mustBeLocal = true;
+ }
+
+ if (!dvmInstanceof(objClass, instField->field.clazz)) {
+ LOG_VFY("VFY: invalid field access (field %s.%s, through %s ref)\n",
+ instField->field.clazz->descriptor, instField->field.name,
+ objClass->descriptor);
+ *pOkay = false;
+ goto bail;
+ }
+
+ if (mustBeLocal) {
+ /* for uninit ref, make sure it's defined by this class, not super */
+ if (instField < objClass->ifields ||
+ instField >= objClass->ifields + objClass->ifieldCount)
+ {
+ LOG_VFY("VFY: invalid constructor field access (field %s in %s)\n",
+ instField->field.name, objClass->descriptor);
+ *pOkay = false;
+ goto bail;
+ }
+ }
+
+bail:
+ return instField;
+}
+
+/*
+ * Look up a static field.
+ *
+ * Returns a StaticField on success, returns NULL and sets "*pOkay" to false
+ * on failure.
+ */
+static StaticField* getStaticField(const Method* meth, int fieldIdx,
+ bool* pOkay)
+{
+ StaticField* staticField;
+
+ staticField = dvmOptResolveStaticField(meth->clazz, fieldIdx);
+ if (staticField == NULL) {
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexFieldId* pFieldId;
+
+ pFieldId = dexGetFieldId(pDexFile, fieldIdx);
+
+ LOG_VFY("VFY: unable to resolve static field %u (%s) in %s\n", fieldIdx,
+ dexStringById(pDexFile, pFieldId->nameIdx),
+ dexStringByTypeIdx(pDexFile, pFieldId->classIdx));
+
+ *pOkay = false;
+ goto bail;
+ }
+
+bail:
+ return staticField;
+}
+
+/*
+ * If "field" is marked "final", make sure this is the either <clinit>
+ * or <init> as appropriate.
+ *
+ * Sets "*pOkay" to false on failure.
+ */
+static void checkFinalFieldAccess(const Method* meth, const Field* field,
+ bool* pOkay)
+{
+ if (!dvmIsFinalField(field))
+ return;
+
+ /* make sure we're in the same class */
+ if (meth->clazz != field->clazz) {
+ LOG_VFY_METH(meth, "VFY: can't modify final field %s.%s\n",
+ field->clazz->descriptor, field->name);
+ *pOkay = false;
+ return;
+ }
+
+ /*
+ * The EMMA code coverage tool generates a static method that
+ * modifies a private static final field. The method is only
+ * called by <clinit>, so the code is reasonable if not quite
+ * kosher. (Attempting to *compile* code that does something
+ * like that will earn you a quick thumbs-down from javac.)
+ *
+ * The verifier in another popular VM doesn't complain about this,
+ * so we're going to allow classes to modify their own static
+ * final fields outside of class initializers. Further testing
+ * showed that modifications to instance fields are also allowed.
+ */
+#if 0
+ /* make sure we're in the right kind of constructor */
+ if (dvmIsStaticField(field)) {
+ if (!isClassInitMethod(meth)) {
+ LOG_VFY_METH(meth,
+ "VFY: can't modify final static field outside <clinit>\n");
+ *pOkay = false;
+ }
+ } else {
+ if (!isInitMethod(meth)) {
+ LOG_VFY_METH(meth,
+ "VFY: can't modify final field outside <init>\n");
+ *pOkay = false;
+ }
+ }
+#endif
+}
+
+/*
+ * Make sure that the register type is suitable for use as an array index.
+ *
+ * Sets "*pOkay" to false if not.
+ */
+static void checkArrayIndexType(const Method* meth, RegType regType,
+ bool* pOkay)
+{
+ if (*pOkay) {
+ /*
+ * The 1nr types are interchangeable at this level. We could
+ * do something special if we can definitively identify it as a
+ * float, but there's no real value in doing so.
+ */
+ checkTypeCategory(regType, kTypeCategory1nr, pOkay);
+ if (!*pOkay) {
+ LOG_VFY_METH(meth, "Invalid reg type for array index (%d)\n",
+ regType);
+ }
+ }
+}
+
+/*
+ * Check constraints on constructor return. Specifically, make sure that
+ * the "this" argument got initialized.
+ *
+ * The "this" argument to <init> uses code offset kUninitThisArgAddr, which
+ * puts it at the start of the list in slot 0. If we see a register with
+ * an uninitialized slot 0 reference, we know it somehow didn't get
+ * initialized.
+ *
+ * Returns "true" if all is well.
+ */
+static bool checkConstructorReturn(const Method* meth, const RegType* insnRegs,
+ const int insnRegCount)
+{
+ int i;
+
+ if (!isInitMethod(meth))
+ return true;
+
+ RegType uninitThis = regTypeFromUninitIndex(kUninitThisArgSlot);
+
+ for (i = 0; i < insnRegCount; i++) {
+ if (insnRegs[i] == uninitThis) {
+ LOG_VFY("VFY: <init> returning without calling superclass init\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Verify that the target instruction is not "move-exception". It's important
+ * that the only way to execute a move-exception is as the first instruction
+ * of an exception handler.
+ *
+ * Returns "true" if all is well, "false" if the target instruction is
+ * move-exception.
+ */
+static bool checkMoveException(const Method* meth, int insnIdx,
+ const char* logNote)
+{
+ assert(insnIdx >= 0 && insnIdx < (int)dvmGetMethodInsnsSize(meth));
+
+ if ((meth->insns[insnIdx] & 0xff) == OP_MOVE_EXCEPTION) {
+ LOG_VFY("VFY: invalid use of move-exception\n");
+ return false;
+ }
+ return true;
+}
+
+/*
+ * For the "move-exception" instruction at "insnIdx", which must be at an
+ * exception handler address, determine the first common superclass of
+ * all exceptions that can land here. (For javac output, we're probably
+ * looking at multiple spans of bytecode covered by one "try" that lands
+ * at an exception-specific "catch", but in general the handler could be
+ * shared for multiple exceptions.)
+ *
+ * Returns NULL if no matching exception handler can be found, or if the
+ * exception is not a subclass of Throwable.
+ */
+static ClassObject* getCaughtExceptionType(const Method* meth, int insnIdx)
+{
+ const DexCode* pCode;
+ DexFile* pDexFile;
+ ClassObject* commonSuper = NULL;
+ u4 handlersSize;
+ u4 offset;
+ u4 i;
+
+ pDexFile = meth->clazz->pDvmDex->pDexFile;
+ pCode = dvmGetMethodCode(meth);
+
+ if (pCode->triesSize != 0) {
+ handlersSize = dexGetHandlersSize(pCode);
+ offset = dexGetFirstHandlerOffset(pCode);
+ } else {
+ handlersSize = 0;
+ offset = 0;
+ }
+
+ for (i = 0; i < handlersSize; i++) {
+ DexCatchIterator iterator;
+ dexCatchIteratorInit(&iterator, pCode, offset);
+
+ for (;;) {
+ const DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
+
+ if (handler == NULL) {
+ break;
+ }
+
+ if (handler->address == (u4) insnIdx) {
+ ClassObject* clazz;
+
+ if (handler->typeIdx == kDexNoIndex)
+ clazz = gDvm.classJavaLangThrowable;
+ else
+ clazz = dvmOptResolveClass(meth->clazz, handler->typeIdx);
+
+ if (clazz == NULL) {
+ LOG_VFY("VFY: unable to resolve exception class %u (%s)\n",
+ handler->typeIdx,
+ dexStringByTypeIdx(pDexFile, handler->typeIdx));
+ } else {
+ if (commonSuper == NULL)
+ commonSuper = clazz;
+ else
+ commonSuper = findCommonSuperclass(clazz, commonSuper);
+ }
+ }
+ }
+
+ offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
+ }
+
+ if (commonSuper == NULL) {
+ LOG_VFY_METH(meth,
+ "VFY: unable to find exception handler at addr 0x%x\n", insnIdx);
+ }
+
+ return commonSuper;
+}
+
+/*
+ * Initialize the RegisterTable.
+ *
+ * Every instruction address can have a different set of information about
+ * what's in which register, but for verification purposes we only need to
+ * store it at branch target addresses (because we merge into that).
+ *
+ * By zeroing out the storage we are effectively initializing the register
+ * information to kRegTypeUnknown.
+ */
+static bool initRegisterTable(const Method* meth, const InsnFlags* insnFlags,
+ RegisterTable* regTable, RegisterTrackingMode trackRegsFor)
+{
+ const int insnsSize = dvmGetMethodInsnsSize(meth);
+ int i;
+
+ regTable->insnRegCountPlus = meth->registersSize + kExtraRegs;
+ regTable->addrRegs = (RegType**) calloc(insnsSize, sizeof(RegType*));
+ if (regTable->addrRegs == NULL)
+ return false;
+
+ assert(insnsSize > 0);
+
+ /*
+ * "All" means "every address that holds the start of an instruction".
+ * "Branches" and "GcPoints" mean just those addresses.
+ *
+ * "GcPoints" fills about half the addresses, "Branches" about 15%.
+ */
+ int interestingCount = 0;
+ //int insnCount = 0;
+
+ for (i = 0; i < insnsSize; i++) {
+ bool interesting;
+
+ switch (trackRegsFor) {
+ case kTrackRegsAll:
+ interesting = dvmInsnIsOpcode(insnFlags, i);
+ break;
+ case kTrackRegsGcPoints:
+ interesting = dvmInsnIsGcPoint(insnFlags, i) ||
+ dvmInsnIsBranchTarget(insnFlags, i);
+ break;
+ case kTrackRegsBranches:
+ interesting = dvmInsnIsBranchTarget(insnFlags, i);
+ break;
+ default:
+ dvmAbort();
+ return false;
+ }
+
+ if (interesting)
+ interestingCount++;
+
+ /* count instructions, for display only */
+ //if (dvmInsnIsOpcode(insnFlags, i))
+ // insnCount++;
+ }
+
+ regTable->regAlloc = (RegType*)
+ calloc(regTable->insnRegCountPlus * interestingCount, sizeof(RegType));
+ if (regTable->regAlloc == NULL)
+ return false;
+
+ RegType* regPtr = regTable->regAlloc;
+ for (i = 0; i < insnsSize; i++) {
+ bool interesting;
+
+ switch (trackRegsFor) {
+ case kTrackRegsAll:
+ interesting = dvmInsnIsOpcode(insnFlags, i);
+ break;
+ case kTrackRegsGcPoints:
+ interesting = dvmInsnIsGcPoint(insnFlags, i) ||
+ dvmInsnIsBranchTarget(insnFlags, i);
+ break;
+ case kTrackRegsBranches:
+ interesting = dvmInsnIsBranchTarget(insnFlags, i);
+ break;
+ default:
+ dvmAbort();
+ return false;
+ }
+
+ if (interesting) {
+ regTable->addrRegs[i] = regPtr;
+ regPtr += regTable->insnRegCountPlus;
+ }
+ }
+
+ //LOGD("Tracking registers for %d, total %d of %d(%d) (%d%%)\n",
+ // TRACK_REGS_FOR, interestingCount, insnCount, insnsSize,
+ // (interestingCount*100) / insnCount);
+
+ assert(regPtr - regTable->regAlloc ==
+ regTable->insnRegCountPlus * interestingCount);
+ assert(regTable->addrRegs[0] != NULL);
+ return true;
+}
+
+
+/*
+ * Verify that the arguments in a filled-new-array instruction are valid.
+ *
+ * "resClass" is the class refered to by pDecInsn->vB.
+ */
+static void verifyFilledNewArrayRegs(const Method* meth,
+ const RegType* insnRegs, const int insnRegCount,
+ const DecodedInstruction* pDecInsn, ClassObject* resClass, bool isRange,
+ bool* pOkay)
+{
+ u4 argCount = pDecInsn->vA;
+ RegType expectedType;
+ PrimitiveType elemType;
+ unsigned int ui;
+
+ assert(dvmIsArrayClass(resClass));
+ elemType = resClass->elementClass->primitiveType;
+ if (elemType == PRIM_NOT) {
+ expectedType = regTypeFromClass(resClass->elementClass);
+ } else {
+ expectedType = primitiveTypeToRegType(elemType);
+ }
+ //LOGI("filled-new-array: %s -> %d\n", resClass->descriptor, expectedType);
+
+ /*
+ * Verify each register. If "argCount" is bad, verifyRegisterType()
+ * will run off the end of the list and fail. It's legal, if silly,
+ * for argCount to be zero.
+ */
+ for (ui = 0; ui < argCount; ui++) {
+ u4 getReg;
+
+ if (isRange)
+ getReg = pDecInsn->vC + ui;
+ else
+ getReg = pDecInsn->arg[ui];
+
+ verifyRegisterType(insnRegs, insnRegCount, getReg, expectedType, pOkay);
+ if (!*pOkay) {
+ LOG_VFY("VFY: filled-new-array arg %u(%u) not valid\n", ui, getReg);
+ return;
+ }
+ }
+}
+
+
+/*
+ * ===========================================================================
+ * Entry point and driver loop
+ * ===========================================================================
+ */
+
+/*
+ * Entry point for the detailed code-flow analysis.
+ */
+bool dvmVerifyCodeFlow(const Method* meth, InsnFlags* insnFlags,
+ UninitInstanceMap* uninitMap)
+{
+ bool result = false;
+ const int insnsSize = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns;
+ const bool generateRegisterMap = gDvm.generateRegisterMaps;
+ int i, offset;
+ bool isConditional;
+ RegisterTable regTable;
+
+ memset(®Table, 0, sizeof(regTable));
+
+#ifndef NDEBUG
+ checkMergeTab(); // only need to do this if table gets updated
+#endif
+
+ /*
+ * We rely on these for verification of const-class, const-string,
+ * and throw instructions. Make sure we have them.
+ */
+ if (gDvm.classJavaLangClass == NULL)
+ gDvm.classJavaLangClass =
+ dvmFindSystemClassNoInit("Ljava/lang/Class;");
+ if (gDvm.classJavaLangString == NULL)
+ gDvm.classJavaLangString =
+ dvmFindSystemClassNoInit("Ljava/lang/String;");
+ if (gDvm.classJavaLangThrowable == NULL)
+ gDvm.classJavaLangThrowable =
+ dvmFindSystemClassNoInit("Ljava/lang/Throwable;");
+ if (gDvm.classJavaLangObject == NULL)
+ gDvm.classJavaLangObject =
+ dvmFindSystemClassNoInit("Ljava/lang/Object;");
+
+ if (meth->registersSize * insnsSize > 2*1024*1024) {
+ /* should probably base this on actual memory requirements */
+ LOG_VFY_METH(meth,
+ "VFY: arbitrarily rejecting large method (regs=%d count=%d)\n",
+ meth->registersSize, insnsSize);
+ goto bail;
+ }
+
+ /*
+ * Create register lists, and initialize them to "Unknown". If we're
+ * also going to create the register map, we need to retain the
+ * register lists for a larger set of addresses.
+ */
+ if (!initRegisterTable(meth, insnFlags, ®Table,
+ generateRegisterMap ? kTrackRegsGcPoints : kTrackRegsBranches))
+ goto bail;
+
+ /*
+ * Initialize the types of the registers that correspond to the
+ * method arguments. We can determine this from the method signature.
+ */
+ if (!setTypesFromSignature(meth, regTable.addrRegs[0], uninitMap))
+ goto bail;
+
+ /*
+ * Run the verifier.
+ */
+ if (!doCodeVerification(meth, insnFlags, ®Table, uninitMap))
+ goto bail;
+
+ /*
+ * Generate a register map.
+ */
+ if (generateRegisterMap) {
+ RegisterMap* pMap;
+ VerifierData vd;
+
+ vd.method = meth;
+ vd.insnsSize = insnsSize;
+ vd.insnRegCount = meth->registersSize;
+ vd.insnFlags = insnFlags;
+ vd.addrRegs = regTable.addrRegs;
+
+ pMap = dvmGenerateRegisterMapV(&vd);
+ if (pMap != NULL) {
+ /*
+ * Tuck it into the Method struct. It will either get used
+ * directly or, if we're in dexopt, will be packed up and
+ * appended to the DEX file.
+ */
+ dvmSetRegisterMap((Method*)meth, pMap);
+ }
+ }
+
+ /*
+ * Success.
+ */
+ result = true;
+
+bail:
+ free(regTable.addrRegs);
+ free(regTable.regAlloc);
+ return result;
+}
+
+/*
+ * Grind through the instructions.
+ *
+ * The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit
+ * on the first instruction, process it (setting additional "changed" bits),
+ * and repeat until there are no more.
+ *
+ * v3 4.11.1.1
+ * - (N/A) operand stack is always the same size
+ * - operand stack [registers] contain the correct types of values
+ * - local variables [registers] contain the correct types of values
+ * - methods are invoked with the appropriate arguments
+ * - fields are assigned using values of appropriate types
+ * - opcodes have the correct type values in operand registers
+ * - there is never an uninitialized class instance in a local variable in
+ * code protected by an exception handler (operand stack is okay, because
+ * the operand stack is discarded when an exception is thrown) [can't
+ * know what's a local var w/o the debug info -- should fall out of
+ * register typing]
+ *
+ * v3 4.11.1.2
+ * - execution cannot fall off the end of the code
+ *
+ * (We also do many of the items described in the "static checks" sections,
+ * because it's easier to do them here.)
+ *
+ * We need an array of RegType values, one per register, for every
+ * instruction. In theory this could become quite large -- up to several
+ * megabytes for a monster function. For self-preservation we reject
+ * anything that requires more than a certain amount of memory. (Typical
+ * "large" should be on the order of 4K code units * 8 registers.) This
+ * will likely have to be adjusted.
+ *
+ *
+ * The spec forbids backward branches when there's an uninitialized reference
+ * in a register. The idea is to prevent something like this:
+ * loop:
+ * move r1, r0
+ * new-instance r0, MyClass
+ * ...
+ * if-eq rN, loop // once
+ * initialize r0
+ *
+ * This leaves us with two different instances, both allocated by the
+ * same instruction, but only one is initialized. The scheme outlined in
+ * v3 4.11.1.4 wouldn't catch this, so they work around it by preventing
+ * backward branches. We achieve identical results without restricting
+ * code reordering by specifying that you can't execute the new-instance
+ * instruction if a register contains an uninitialized instance created
+ * by that same instrutcion.
+ */
+static bool doCodeVerification(const Method* meth, InsnFlags* insnFlags,
+ RegisterTable* regTable, UninitInstanceMap* uninitMap)
+{
+ const int insnsSize = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns;
+ RegType workRegs[meth->registersSize + kExtraRegs];
+ bool result = false;
+ bool debugVerbose = false;
+ int insnIdx, startGuess, prevAddr;
+
+ /*
+ * Begin by marking the first instruction as "changed".
+ */
+ dvmInsnSetChanged(insnFlags, 0, true);
+
+ if (doVerboseLogging(meth)) {
+ IF_LOGI() {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOGI("Now verifying: %s.%s %s (ins=%d regs=%d)\n",
+ meth->clazz->descriptor, meth->name, desc,
+ meth->insSize, meth->registersSize);
+ LOGI(" ------ [0 4 8 12 16 20 24 28 32 36\n");
+ free(desc);
+ }
+ debugVerbose = true;
+ gDebugVerbose = true;
+ } else {
+ gDebugVerbose = false;
+ }
+
+ startGuess = 0;
+
+ /*
+ * Continue until no instructions are marked "changed".
+ */
+ while (true) {
+ /*
+ * Find the first marked one. Use "startGuess" as a way to find
+ * one quickly.
+ */
+ for (insnIdx = startGuess; insnIdx < insnsSize; insnIdx++) {
+ if (dvmInsnIsChanged(insnFlags, insnIdx))
+ break;
+ }
+
+ if (insnIdx == insnsSize) {
+ if (startGuess != 0) {
+ /* try again, starting from the top */
+ startGuess = 0;
+ continue;
+ } else {
+ /* all flags are clear */
+ break;
+ }
+ }
+
+ /*
+ * We carry the working set of registers from instruction to
+ * instruction. If this address can be the target of a branch
+ * (or throw) instruction, or if we're skipping around chasing
+ * "changed" flags, we need to load the set of registers from
+ * the table.
+ *
+ * Because we always prefer to continue on to the next instruction,
+ * we should never have a situation where we have a stray
+ * "changed" flag set on an instruction that isn't a branch target.
+ */
+ if (dvmInsnIsBranchTarget(insnFlags, insnIdx)) {
+ RegType* insnRegs = getRegisterLine(regTable, insnIdx);
+ assert(insnRegs != NULL);
+ copyRegisters(workRegs, insnRegs, meth->registersSize + kExtraRegs);
+
+ if (debugVerbose) {
+ dumpRegTypes(meth, insnFlags, workRegs, insnIdx, NULL,uninitMap,
+ SHOW_REG_DETAILS);
+ }
+
+ } else {
+ if (debugVerbose) {
+ dumpRegTypes(meth, insnFlags, workRegs, insnIdx, NULL,uninitMap,
+ SHOW_REG_DETAILS);
+ }
+
+#ifndef NDEBUG
+ /*
+ * Sanity check: retrieve the stored register line (assuming
+ * a full table) and make sure it actually matches.
+ */
+ RegType* insnRegs = getRegisterLine(regTable, insnIdx);
+ if (insnRegs != NULL &&
+ compareRegisters(workRegs, insnRegs,
+ meth->registersSize + kExtraRegs) != 0)
+ {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOG_VFY("HUH? workRegs diverged in %s.%s %s\n",
+ meth->clazz->descriptor, meth->name, desc);
+ free(desc);
+ dumpRegTypes(meth, insnFlags, workRegs, 0, "work",
+ uninitMap, DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS);
+ dumpRegTypes(meth, insnFlags, insnRegs, 0, "insn",
+ uninitMap, DRT_SHOW_REF_TYPES | DRT_SHOW_LOCALS);
+ }
+#endif
+ }
+
+ //LOGI("process %s.%s %s %d\n",
+ // meth->clazz->descriptor, meth->name, meth->descriptor, insnIdx);
+ if (!verifyInstruction(meth, insnFlags, regTable, workRegs, insnIdx,
+ uninitMap, &startGuess))
+ {
+ //LOGD("+++ %s bailing at %d\n", meth->name, insnIdx);
+ goto bail;
+ }
+
+#if 0
+ {
+ static const int gcMask = kInstrCanBranch | kInstrCanSwitch |
+ kInstrCanThrow | kInstrCanReturn;
+ OpCode opCode = *(meth->insns + insnIdx) & 0xff;
+ int flags = dexGetInstrFlags(gDvm.instrFlags, opCode);
+
+ /* 8, 16, 32, or 32*n -bit regs */
+ int regWidth = (meth->registersSize + 7) / 8;
+ if (regWidth == 3)
+ regWidth = 4;
+ if (regWidth > 4) {
+ regWidth = ((regWidth + 3) / 4) * 4;
+ if (false) {
+ LOGW("WOW: %d regs -> %d %s.%s\n",
+ meth->registersSize, regWidth,
+ meth->clazz->descriptor, meth->name);
+ //x = true;
+ }
+ }
+
+ if ((flags & gcMask) != 0) {
+ /* this is a potential GC point */
+ gDvm__gcInstr++;
+
+ if (insnsSize < 256)
+ gDvm__gcData += 1;
+ else
+ gDvm__gcData += 2;
+ gDvm__gcData += regWidth;
+ }
+ gDvm__gcSimpleData += regWidth;
+
+ gDvm__totalInstr++;
+ }
+#endif
+
+ /*
+ * Clear "changed" and mark as visited.
+ */
+ dvmInsnSetVisited(insnFlags, insnIdx, true);
+ dvmInsnSetChanged(insnFlags, insnIdx, false);
+ }
+
+ if (DEAD_CODE_SCAN) {
+ /*
+ * Scan for dead code. There's nothing "evil" about dead code, but it
+ * indicates a flaw somewhere down the line, possibly in the verifier.
+ */
+ int deadStart = -1;
+ for (insnIdx = 0; insnIdx < insnsSize;
+ insnIdx += dvmInsnGetWidth(insnFlags, insnIdx))
+ {
+ /*
+ * Switch-statement data doesn't get "visited" by scanner. It
+ * may or may not be preceded by a padding NOP.
+ */
+ int instr = meth->insns[insnIdx];
+ if (instr == kPackedSwitchSignature ||
+ instr == kSparseSwitchSignature ||
+ instr == kArrayDataSignature ||
+ (instr == OP_NOP &&
+ (meth->insns[insnIdx+1] == kPackedSwitchSignature ||
+ meth->insns[insnIdx+1] == kSparseSwitchSignature ||
+ meth->insns[insnIdx+1] == kArrayDataSignature)))
+ {
+ dvmInsnSetVisited(insnFlags, insnIdx, true);
+ }
+
+ if (!dvmInsnIsVisited(insnFlags, insnIdx)) {
+ if (deadStart < 0)
+ deadStart = insnIdx;
+ } else if (deadStart >= 0) {
+ IF_LOGD() {
+ char* desc =
+ dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOGD("VFY: dead code 0x%04x-%04x in %s.%s %s\n",
+ deadStart, insnIdx-1,
+ meth->clazz->descriptor, meth->name, desc);
+ free(desc);
+ }
+
+ deadStart = -1;
+ }
+ }
+ if (deadStart >= 0) {
+ IF_LOGD() {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOGD("VFY: dead code 0x%04x-%04x in %s.%s %s\n",
+ deadStart, insnIdx-1,
+ meth->clazz->descriptor, meth->name, desc);
+ free(desc);
+ }
+ }
+ }
+
+ result = true;
+
+bail:
+ return result;
+}
+
+
+/*
+ * Perform verification for a single instruction.
+ *
+ * This requires fully decoding the instruction to determine the effect
+ * it has on registers.
+ *
+ * Finds zero or more following instructions and sets the "changed" flag
+ * if execution at that point needs to be (re-)evaluated. Register changes
+ * are merged into "regTypes" at the target addresses. Does not set or
+ * clear any other flags in "insnFlags".
+ */
+static bool verifyInstruction(const Method* meth, InsnFlags* insnFlags,
+ RegisterTable* regTable, RegType* workRegs, int insnIdx,
+ UninitInstanceMap* uninitMap, int* pStartGuess)
+{
+ const int insnsSize = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns + insnIdx;
+ bool result = false;
+
+ /*
+ * Once we finish decoding the instruction, we need to figure out where
+ * we can go from here. There are three possible ways to transfer
+ * control to another statement:
+ *
+ * (1) Continue to the next instruction. Applies to all but
+ * unconditional branches, method returns, and exception throws.
+ * (2) Branch to one or more possible locations. Applies to branches
+ * and switch statements.
+ * (3) Exception handlers. Applies to any instruction that can
+ * throw an exception that is handled by an encompassing "try"
+ * block. (We simplify this to be any instruction that can
+ * throw any exception.)
+ *
+ * We can also return, in which case there is no successor instruction
+ * from this point.
+ *
+ * The behavior can be determined from the InstrFlags.
+ */
+
+ const DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ RegType entryRegs[meth->registersSize + kExtraRegs];
+ ClassObject* resClass;
+ const char* className;
+ int branchTarget = 0;
+ const int insnRegCount = meth->registersSize;
+ RegType tmpType;
+ DecodedInstruction decInsn;
+ bool justSetResult = false;
+ bool okay = true;
+
+#ifndef NDEBUG
+ memset(&decInsn, 0x81, sizeof(decInsn));
+#endif
+ dexDecodeInstruction(gDvm.instrFormat, insns, &decInsn);
+
+ const int nextFlags = dexGetInstrFlags(gDvm.instrFlags, decInsn.opCode);
+
+ /*
+ * Make a copy of the previous register state. If the instruction
+ * throws an exception, we merge *this* into the destination rather
+ * than workRegs, because we don't want the result from the "successful"
+ * code path (e.g. a check-cast that "improves" a type) to be visible
+ * to the exception handler.
+ */
+ if ((nextFlags & kInstrCanThrow) != 0 && dvmInsnIsInTry(insnFlags, insnIdx))
+ {
+ copyRegisters(entryRegs, workRegs, meth->registersSize + kExtraRegs);
+ } else {
+#ifndef NDEBUG
+ memset(entryRegs, 0xdd,
+ (meth->registersSize + kExtraRegs) * sizeof(RegType));
+#endif
+ }
+
+ switch (decInsn.opCode) {
+ case OP_NOP:
+ /*
+ * A "pure" NOP has no effect on anything. Data tables start with
+ * a signature that looks like a NOP; if we see one of these in
+ * the course of executing code then we have a problem.
+ */
+ if (decInsn.vA != 0) {
+ LOG_VFY("VFY: encountered data table in instruction stream\n");
+ okay = false;
+ }
+ break;
+
+ case OP_MOVE:
+ case OP_MOVE_FROM16:
+ case OP_MOVE_16:
+ copyRegister1(workRegs, insnRegCount, decInsn.vA, decInsn.vB,
+ kTypeCategory1nr, &okay);
+ break;
+ case OP_MOVE_WIDE:
+ case OP_MOVE_WIDE_FROM16:
+ case OP_MOVE_WIDE_16:
+ copyRegister2(workRegs, insnRegCount, decInsn.vA, decInsn.vB, &okay);
+ break;
+ case OP_MOVE_OBJECT:
+ case OP_MOVE_OBJECT_FROM16:
+ case OP_MOVE_OBJECT_16:
+ copyRegister1(workRegs, insnRegCount, decInsn.vA, decInsn.vB,
+ kTypeCategoryRef, &okay);
+ break;
+
+ /*
+ * The move-result instructions copy data out of a "pseudo-register"
+ * with the results from the last method invocation. In practice we
+ * might want to hold the result in an actual CPU register, so the
+ * Dalvik spec requires that these only appear immediately after an
+ * invoke or filled-new-array.
+ *
+ * These calls invalidate the "result" register. (This is now
+ * redundant with the reset done below, but it can make the debug info
+ * easier to read in some cases.)
+ */
+ case OP_MOVE_RESULT:
+ copyResultRegister1(workRegs, insnRegCount, decInsn.vA,
+ kTypeCategory1nr, &okay);
+ break;
+ case OP_MOVE_RESULT_WIDE:
+ copyResultRegister2(workRegs, insnRegCount, decInsn.vA, &okay);
+ break;
+ case OP_MOVE_RESULT_OBJECT:
+ copyResultRegister1(workRegs, insnRegCount, decInsn.vA,
+ kTypeCategoryRef, &okay);
+ break;
+
+ case OP_MOVE_EXCEPTION:
+ /*
+ * This statement can only appear as the first instruction in an
+ * exception handler (though not all exception handlers need to
+ * have one of these). We verify that as part of extracting the
+ * exception type from the catch block list.
+ *
+ * "resClass" will hold the closest common superclass of all
+ * exceptions that can be handled here.
+ */
+ resClass = getCaughtExceptionType(meth, insnIdx);
+ if (resClass == NULL) {
+ okay = false;
+ } else {
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(resClass), &okay);
+ }
+ break;
+
+ case OP_RETURN_VOID:
+ okay = checkConstructorReturn(meth, workRegs, insnRegCount);
+ if (okay && getMethodReturnType(meth) != kRegTypeUnknown) {
+ LOG_VFY("VFY: return-void not expected\n");
+ okay = false;
+ }
+ break;
+ case OP_RETURN:
+ okay = checkConstructorReturn(meth, workRegs, insnRegCount);
+ if (okay) {
+ /* check the method signature */
+ RegType returnType = getMethodReturnType(meth);
+ checkTypeCategory(returnType, kTypeCategory1nr, &okay);
+ if (!okay)
+ LOG_VFY("VFY: return-32 not expected\n");
+
+ /* check the register contents */
+ returnType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ checkTypeCategory(returnType, kTypeCategory1nr, &okay);
+ if (!okay)
+ LOG_VFY("VFY: return-32 on invalid register v%d\n", decInsn.vA);
+ }
+ break;
+ case OP_RETURN_WIDE:
+ okay = checkConstructorReturn(meth, workRegs, insnRegCount);
+ if (okay) {
+ RegType returnType, returnTypeHi;
+
+ /* check the method signature */
+ returnType = getMethodReturnType(meth);
+ checkTypeCategory(returnType, kTypeCategory2, &okay);
+ if (!okay)
+ LOG_VFY("VFY: return-wide not expected\n");
+
+ /* check the register contents */
+ returnType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ returnTypeHi = getRegisterType(workRegs, insnRegCount,
+ decInsn.vA +1, &okay);
+ if (okay) {
+ checkTypeCategory(returnType, kTypeCategory2, &okay);
+ checkWidePair(returnType, returnTypeHi, &okay);
+ }
+ if (!okay) {
+ LOG_VFY("VFY: return-wide on invalid register pair v%d\n",
+ decInsn.vA);
+ }
+ }
+ break;
+ case OP_RETURN_OBJECT:
+ okay = checkConstructorReturn(meth, workRegs, insnRegCount);
+ if (okay) {
+ RegType returnType = getMethodReturnType(meth);
+ checkTypeCategory(returnType, kTypeCategoryRef, &okay);
+ if (!okay) {
+ LOG_VFY("VFY: return-object not expected\n");
+ break;
+ }
+
+ /* returnType is the *expected* return type, not register value */
+ assert(returnType != kRegTypeZero);
+ assert(!regTypeIsUninitReference(returnType));
+
+ /*
+ * Verify that the reference in vAA is an instance of the type
+ * in "returnType". The Zero type is allowed here. If the
+ * method is declared to return an interface, then any
+ * initialized reference is acceptable.
+ *
+ * Note getClassFromRegister fails if the register holds an
+ * uninitialized reference, so we do not allow them to be
+ * returned.
+ */
+ ClassObject* declClass;
+
+ declClass = regTypeInitializedReferenceToClass(returnType);
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vA, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ if (!dvmIsInterfaceClass(declClass) &&
+ !dvmInstanceof(resClass, declClass))
+ {
+ LOG_VFY("VFY: returning %s, declared %s\n",
+ resClass->descriptor, declClass->descriptor);
+ okay = false;
+ break;
+ }
+ }
+ }
+ break;
+
+ case OP_CONST_4:
+ case OP_CONST_16:
+ case OP_CONST:
+ /* could be boolean, int, float, or a null reference */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dvmDetermineCat1Const((s4)decInsn.vB), &okay);
+ break;
+ case OP_CONST_HIGH16:
+ /* could be boolean, int, float, or a null reference */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dvmDetermineCat1Const((s4) decInsn.vB << 16), &okay);
+ break;
+ case OP_CONST_WIDE_16:
+ case OP_CONST_WIDE_32:
+ case OP_CONST_WIDE:
+ case OP_CONST_WIDE_HIGH16:
+ /* could be long or double; default to long and allow conversion */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ kRegTypeLongLo, &okay);
+ break;
+ case OP_CONST_STRING:
+ case OP_CONST_STRING_JUMBO:
+ assert(gDvm.classJavaLangString != NULL);
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(gDvm.classJavaLangString), &okay);
+ break;
+ case OP_CONST_CLASS:
+ assert(gDvm.classJavaLangClass != NULL);
+ /* make sure we can resolve the class; access check is important */
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vB);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vB);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve const-class %d (%s) in %s\n",
+ decInsn.vB, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else {
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(gDvm.classJavaLangClass), &okay);
+ }
+ break;
+
+ case OP_MONITOR_ENTER:
+ case OP_MONITOR_EXIT:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (okay && !regTypeIsReference(tmpType)) {
+ LOG_VFY("VFY: monitor op on non-object\n");
+ okay = false;
+ }
+ break;
+
+ case OP_CHECK_CAST:
+ /*
+ * If this instruction succeeds, we will promote register vA to
+ * the type in vB. (This could be a demotion -- not expected, so
+ * we don't try to address it.)
+ *
+ * If it fails, an exception is thrown, which we deal with later
+ * by ignoring the update to decInsn.vA when branching to a handler.
+ */
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vB);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vB);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve check-cast %d (%s) in %s\n",
+ decInsn.vB, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else {
+ RegType origType;
+
+ origType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!okay)
+ break;
+ if (!regTypeIsReference(origType)) {
+ LOG_VFY("VFY: check-cast on non-reference in v%u\n",decInsn.vA);
+ okay = false;
+ break;
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(resClass), &okay);
+ }
+ break;
+ case OP_INSTANCE_OF:
+ /* make sure we're checking a reference type */
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (!regTypeIsReference(tmpType)) {
+ LOG_VFY("VFY: vB not a reference (%d)\n", tmpType);
+ okay = false;
+ break;
+ }
+
+ /* make sure we can resolve the class; access check is important */
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vC);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vC);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve instanceof %d (%s) in %s\n",
+ decInsn.vC, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else {
+ /* result is boolean */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ kRegTypeBoolean, &okay);
+ }
+ break;
+
+ case OP_ARRAY_LENGTH:
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL && !dvmIsArrayClass(resClass)) {
+ LOG_VFY("VFY: array-length on non-array\n");
+ okay = false;
+ break;
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, kRegTypeInteger,
+ &okay);
+ break;
+
+ case OP_NEW_INSTANCE:
+ /*
+ * We can check for interface and abstract classes here, but we
+ * can't reject them. We can ask the optimizer to replace the
+ * instructions with a magic "always throw InstantiationError"
+ * instruction. (Not enough bytes to sub in a method call.)
+ */
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vB);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vB);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve new-instance %d (%s) in %s\n",
+ decInsn.vB, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else {
+ RegType uninitType;
+
+ /* add resolved class to uninit map if not already there */
+ int uidx = dvmSetUninitInstance(uninitMap, insnIdx, resClass);
+ assert(uidx >= 0);
+ uninitType = regTypeFromUninitIndex(uidx);
+
+ /*
+ * Any registers holding previous allocations from this address
+ * that have not yet been initialized must be marked invalid.
+ */
+ markUninitRefsAsInvalid(workRegs, insnRegCount, uninitMap,
+ uninitType);
+
+ /* add the new uninitialized reference to the register ste */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ uninitType, &okay);
+ }
+ break;
+ case OP_NEW_ARRAY:
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vC);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vC);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve new-array %d (%s) in %s\n",
+ decInsn.vC, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else if (!dvmIsArrayClass(resClass)) {
+ LOG_VFY("VFY: new-array on non-array class\n");
+ okay = false;
+ } else {
+ /* make sure "size" register is valid type */
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vB,
+ kRegTypeInteger, &okay);
+ /* set register type to array class */
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(resClass), &okay);
+ }
+ break;
+ case OP_FILLED_NEW_ARRAY:
+ case OP_FILLED_NEW_ARRAY_RANGE:
+ resClass = dvmOptResolveClass(meth->clazz, decInsn.vB);
+ if (resClass == NULL) {
+ const char* badClassDesc = dexStringByTypeIdx(pDexFile, decInsn.vB);
+ dvmLogUnableToResolveClass(badClassDesc, meth);
+ LOG_VFY("VFY: unable to resolve filled-array %d (%s) in %s\n",
+ decInsn.vB, badClassDesc, meth->clazz->descriptor);
+ okay = false;
+ } else if (!dvmIsArrayClass(resClass)) {
+ LOG_VFY("VFY: filled-new-array on non-array class\n");
+ okay = false;
+ } else {
+ bool isRange = (decInsn.opCode == OP_FILLED_NEW_ARRAY_RANGE);
+
+ /* check the arguments to the instruction */
+ verifyFilledNewArrayRegs(meth, workRegs, insnRegCount, &decInsn,
+ resClass, isRange, &okay);
+ /* filled-array result goes into "result" register */
+ setResultRegisterType(workRegs, insnRegCount,
+ regTypeFromClass(resClass), &okay);
+ justSetResult = true;
+ }
+ break;
+
+ case OP_CMPL_FLOAT:
+ case OP_CMPG_FLOAT:
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vB, kRegTypeFloat,
+ &okay);
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vC, kRegTypeFloat,
+ &okay);
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, kRegTypeBoolean,
+ &okay);
+ break;
+ case OP_CMPL_DOUBLE:
+ case OP_CMPG_DOUBLE:
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vB, kRegTypeDoubleLo,
+ &okay);
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vC, kRegTypeDoubleLo,
+ &okay);
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, kRegTypeBoolean,
+ &okay);
+ break;
+ case OP_CMP_LONG:
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vB, kRegTypeLongLo,
+ &okay);
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vC, kRegTypeLongLo,
+ &okay);
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, kRegTypeBoolean,
+ &okay);
+ break;
+
+ case OP_THROW:
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vA, &okay);
+ if (okay && resClass != NULL) {
+ if (!dvmInstanceof(resClass, gDvm.classJavaLangThrowable)) {
+ LOG_VFY("VFY: thrown class %s not instanceof Throwable\n",
+ resClass->descriptor);
+ okay = false;
+ }
+ }
+ break;
+
+ case OP_GOTO:
+ case OP_GOTO_16:
+ case OP_GOTO_32:
+ /* no effect on or use of registers */
+ break;
+
+ case OP_PACKED_SWITCH:
+ case OP_SPARSE_SWITCH:
+ /* verify that vAA is an integer, or can be converted to one */
+ verifyRegisterType(workRegs, insnRegCount, decInsn.vA,
+ kRegTypeInteger, &okay);
+ break;
+
+ case OP_FILL_ARRAY_DATA:
+ {
+ RegType valueType;
+ const u2 *arrayData;
+ u2 elemWidth;
+
+ /* Similar to the verification done for APUT */
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vA, &okay);
+ if (!okay)
+ break;
+
+ /* resClass can be null if the reg type is Zero */
+ if (resClass == NULL)
+ break;
+
+ if (!dvmIsArrayClass(resClass) || resClass->arrayDim != 1 ||
+ resClass->elementClass->primitiveType == PRIM_NOT ||
+ resClass->elementClass->primitiveType == PRIM_VOID)
+ {
+ LOG_VFY("VFY: invalid fill-array-data on %s\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ valueType = primitiveTypeToRegType(
+ resClass->elementClass->primitiveType);
+ assert(valueType != kRegTypeUnknown);
+
+ /*
+ * Now verify if the element width in the table matches the element
+ * width declared in the array
+ */
+ arrayData = insns + (insns[1] | (((s4)insns[2]) << 16));
+ if (arrayData[0] != kArrayDataSignature) {
+ LOG_VFY("VFY: invalid magic for array-data\n");
+ okay = false;
+ break;
+ }
+
+ switch (resClass->elementClass->primitiveType) {
+ case PRIM_BOOLEAN:
+ case PRIM_BYTE:
+ elemWidth = 1;
+ break;
+ case PRIM_CHAR:
+ case PRIM_SHORT:
+ elemWidth = 2;
+ break;
+ case PRIM_FLOAT:
+ case PRIM_INT:
+ elemWidth = 4;
+ break;
+ case PRIM_DOUBLE:
+ case PRIM_LONG:
+ elemWidth = 8;
+ break;
+ default:
+ elemWidth = 0;
+ break;
+ }
+
+ /*
+ * Since we don't compress the data in Dex, expect to see equal
+ * width of data stored in the table and expected from the array
+ * class.
+ */
+ if (arrayData[1] != elemWidth) {
+ LOG_VFY("VFY: array-data size mismatch (%d vs %d)\n",
+ arrayData[1], elemWidth);
+ okay = false;
+ }
+ }
+ break;
+
+ case OP_IF_EQ:
+ case OP_IF_NE:
+ {
+ RegType type1, type2;
+ bool tmpResult;
+
+ type1 = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ type2 = getRegisterType(workRegs, insnRegCount, decInsn.vB, &okay);
+ if (!okay)
+ break;
+
+ /* both references? */
+ if (regTypeIsReference(type1) && regTypeIsReference(type2))
+ break;
+
+ /* both category-1nr? */
+ checkTypeCategory(type1, kTypeCategory1nr, &okay);
+ checkTypeCategory(type2, kTypeCategory1nr, &okay);
+ if (!okay) {
+ LOG_VFY("VFY: args to if-eq/if-ne must both be refs or cat1\n");
+ break;
+ }
+ }
+ break;
+ case OP_IF_LT:
+ case OP_IF_GE:
+ case OP_IF_GT:
+ case OP_IF_LE:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (!okay)
+ break;
+ checkTypeCategory(tmpType, kTypeCategory1nr, &okay);
+ if (!okay) {
+ LOG_VFY("VFY: args to 'if' must be cat-1nr\n");
+ break;
+ }
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vB,&okay);
+ if (!okay)
+ break;
+ checkTypeCategory(tmpType, kTypeCategory1nr, &okay);
+ if (!okay) {
+ LOG_VFY("VFY: args to 'if' must be cat-1nr\n");
+ break;
+ }
+ break;
+ case OP_IF_EQZ:
+ case OP_IF_NEZ:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (!okay)
+ break;
+ if (regTypeIsReference(tmpType))
+ break;
+ checkTypeCategory(tmpType, kTypeCategory1nr, &okay);
+ if (!okay)
+ LOG_VFY("VFY: expected cat-1 arg to if\n");
+ break;
+ case OP_IF_LTZ:
+ case OP_IF_GEZ:
+ case OP_IF_GTZ:
+ case OP_IF_LEZ:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (!okay)
+ break;
+ checkTypeCategory(tmpType, kTypeCategory1nr, &okay);
+ if (!okay)
+ LOG_VFY("VFY: expected cat-1 arg to if\n");
+ break;
+
+ case OP_AGET:
+ tmpType = kRegTypeInteger;
+ goto aget_1nr_common;
+ case OP_AGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto aget_1nr_common;
+ case OP_AGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto aget_1nr_common;
+ case OP_AGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto aget_1nr_common;
+ case OP_AGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto aget_1nr_common;
+aget_1nr_common:
+ {
+ RegType srcType, indexType;
+
+ indexType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, indexType, &okay);
+ if (!okay)
+ break;
+
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ /* verify the class */
+ if (!dvmIsArrayClass(resClass) || resClass->arrayDim != 1 ||
+ resClass->elementClass->primitiveType == PRIM_NOT)
+ {
+ LOG_VFY("VFY: invalid aget-1nr target %s\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ /* make sure array type matches instruction */
+ srcType = primitiveTypeToRegType(
+ resClass->elementClass->primitiveType);
+
+ if (!checkFieldArrayStore1nr(tmpType, srcType)) {
+ LOG_VFY("VFY: invalid aget-1nr, array type=%d with"
+ " inst type=%d (on %s)\n",
+ srcType, tmpType, resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ tmpType, &okay);
+ }
+ break;
+
+ case OP_AGET_WIDE:
+ {
+ RegType dstType, indexType;
+
+ indexType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, indexType, &okay);
+ if (!okay)
+ break;
+
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ /* verify the class */
+ if (!dvmIsArrayClass(resClass) || resClass->arrayDim != 1 ||
+ resClass->elementClass->primitiveType == PRIM_NOT)
+ {
+ LOG_VFY("VFY: invalid aget-wide target %s\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ /* try to refine "dstType" */
+ switch (resClass->elementClass->primitiveType) {
+ case PRIM_LONG:
+ dstType = kRegTypeLongLo;
+ break;
+ case PRIM_DOUBLE:
+ dstType = kRegTypeDoubleLo;
+ break;
+ default:
+ LOG_VFY("VFY: invalid aget-wide on %s\n",
+ resClass->descriptor);
+ dstType = kRegTypeUnknown;
+ okay = false;
+ break;
+ }
+ } else {
+ /*
+ * Null array ref; this code path will fail at runtime. We
+ * know this is either long or double, and we don't really
+ * discriminate between those during verification, so we
+ * call it a long.
+ */
+ dstType = kRegTypeLongLo;
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dstType, &okay);
+ }
+ break;
+
+ case OP_AGET_OBJECT:
+ {
+ RegType dstType, indexType;
+
+ indexType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, indexType, &okay);
+ if (!okay)
+ break;
+
+ /* get the class of the array we're pulling an object from */
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ ClassObject* elementClass;
+
+ assert(resClass != NULL);
+ if (!dvmIsArrayClass(resClass)) {
+ LOG_VFY("VFY: aget-object on non-array class\n");
+ okay = false;
+ break;
+ }
+ assert(resClass->elementClass != NULL);
+
+ /*
+ * Find the element class. resClass->elementClass indicates
+ * the basic type, which won't be what we want for a
+ * multi-dimensional array.
+ */
+ if (resClass->descriptor[1] == '[') {
+ assert(resClass->arrayDim > 1);
+ elementClass = dvmFindArrayClass(&resClass->descriptor[1],
+ resClass->classLoader);
+ } else if (resClass->descriptor[1] == 'L') {
+ assert(resClass->arrayDim == 1);
+ elementClass = resClass->elementClass;
+ } else {
+ LOG_VFY("VFY: aget-object on non-ref array class (%s)\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ dstType = regTypeFromClass(elementClass);
+ } else {
+ /*
+ * The array reference is NULL, so the current code path will
+ * throw an exception. For proper merging with later code
+ * paths, and correct handling of "if-eqz" tests on the
+ * result of the array get, we want to treat this as a null
+ * reference.
+ */
+ dstType = kRegTypeZero;
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dstType, &okay);
+ }
+ break;
+ case OP_APUT:
+ tmpType = kRegTypeInteger;
+ goto aput_1nr_common;
+ case OP_APUT_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto aput_1nr_common;
+ case OP_APUT_BYTE:
+ tmpType = kRegTypeByte;
+ goto aput_1nr_common;
+ case OP_APUT_CHAR:
+ tmpType = kRegTypeChar;
+ goto aput_1nr_common;
+ case OP_APUT_SHORT:
+ tmpType = kRegTypeShort;
+ goto aput_1nr_common;
+aput_1nr_common:
+ {
+ RegType srcType, dstType, indexType;
+
+ indexType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, indexType, &okay);
+ if (!okay)
+ break;
+
+ /* make sure the source register has the correct type */
+ srcType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!canConvertTo1nr(srcType, tmpType)) {
+ LOG_VFY("VFY: invalid reg type %d on aput instr (need %d)\n",
+ srcType, tmpType);
+ okay = false;
+ break;
+ }
+
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+
+ /* resClass can be null if the reg type is Zero */
+ if (resClass == NULL)
+ break;
+
+ if (!dvmIsArrayClass(resClass) || resClass->arrayDim != 1 ||
+ resClass->elementClass->primitiveType == PRIM_NOT)
+ {
+ LOG_VFY("VFY: invalid aput-1nr on %s\n", resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ /* verify that instruction matches array */
+ dstType = primitiveTypeToRegType(
+ resClass->elementClass->primitiveType);
+ assert(dstType != kRegTypeUnknown);
+
+ if (!checkFieldArrayStore1nr(tmpType, dstType)) {
+ LOG_VFY("VFY: invalid aput-1nr on %s (inst=%d dst=%d)\n",
+ resClass->descriptor, tmpType, dstType);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_APUT_WIDE:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, tmpType, &okay);
+ if (!okay)
+ break;
+
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (okay) {
+ RegType typeHi =
+ getRegisterType(workRegs, insnRegCount, decInsn.vA+1, &okay);
+ checkTypeCategory(tmpType, kTypeCategory2, &okay);
+ checkWidePair(tmpType, typeHi, &okay);
+ }
+ if (!okay)
+ break;
+
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ /* verify the class and try to refine "dstType" */
+ if (!dvmIsArrayClass(resClass) || resClass->arrayDim != 1 ||
+ resClass->elementClass->primitiveType == PRIM_NOT)
+ {
+ LOG_VFY("VFY: invalid aput-wide on %s\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ switch (resClass->elementClass->primitiveType) {
+ case PRIM_LONG:
+ case PRIM_DOUBLE:
+ /* these are okay */
+ break;
+ default:
+ LOG_VFY("VFY: invalid aput-wide on %s\n",
+ resClass->descriptor);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_APUT_OBJECT:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vC,
+ &okay);
+ checkArrayIndexType(meth, tmpType, &okay);
+ if (!okay)
+ break;
+
+ /* get the ref we're storing; Zero is okay, Uninit is not */
+ resClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vA, &okay);
+ if (!okay)
+ break;
+ if (resClass != NULL) {
+ ClassObject* arrayClass;
+ ClassObject* elementClass;
+
+ /*
+ * Get the array class. If the array ref is null, we won't
+ * have type information (and we'll crash at runtime with a
+ * null pointer exception).
+ */
+ arrayClass = getClassFromRegister(workRegs, insnRegCount,
+ decInsn.vB, &okay);
+
+ if (arrayClass != NULL) {
+ /* see if the array holds a compatible type */
+ if (!dvmIsArrayClass(arrayClass)) {
+ LOG_VFY("VFY: invalid aput-object on %s\n",
+ arrayClass->descriptor);
+ okay = false;
+ break;
+ }
+
+ /*
+ * Find the element class. resClass->elementClass indicates
+ * the basic type, which won't be what we want for a
+ * multi-dimensional array.
+ *
+ * All we want to check here is that the element type is a
+ * reference class. We *don't* check instanceof here, because
+ * you can still put a String into a String[] after the latter
+ * has been cast to an Object[].
+ */
+ if (arrayClass->descriptor[1] == '[') {
+ assert(arrayClass->arrayDim > 1);
+ elementClass = dvmFindArrayClass(&arrayClass->descriptor[1],
+ arrayClass->classLoader);
+ } else {
+ assert(arrayClass->arrayDim == 1);
+ elementClass = arrayClass->elementClass;
+ }
+ if (elementClass->primitiveType != PRIM_NOT) {
+ LOG_VFY("VFY: invalid aput-object of %s into %s\n",
+ resClass->descriptor, arrayClass->descriptor);
+ okay = false;
+ break;
+ }
+ }
+ }
+ break;
+
+ case OP_IGET:
+ tmpType = kRegTypeInteger;
+ goto iget_1nr_common;
+ case OP_IGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto iget_1nr_common;
+ case OP_IGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto iget_1nr_common;
+ case OP_IGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto iget_1nr_common;
+ case OP_IGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto iget_1nr_common;
+iget_1nr_common:
+ {
+ ClassObject* fieldClass;
+ InstField* instField;
+ RegType objType, fieldType;
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+
+ /* make sure the field's type is compatible with expectation */
+ fieldType = primSigCharToRegType(instField->field.signature[0]);
+ if (fieldType == kRegTypeUnknown ||
+ !checkFieldArrayStore1nr(tmpType, fieldType))
+ {
+ LOG_VFY("VFY: invalid iget-1nr of %s.%s (inst=%d field=%d)\n",
+ instField->field.clazz->descriptor,
+ instField->field.name, tmpType, fieldType);
+ okay = false;
+ break;
+ }
+
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, tmpType, &okay);
+ }
+ break;
+ case OP_IGET_WIDE:
+ {
+ RegType dstType;
+ ClassObject* fieldClass;
+ InstField* instField;
+ RegType objType;
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+ /* check the type, which should be prim */
+ switch (instField->field.signature[0]) {
+ case 'D':
+ dstType = kRegTypeDoubleLo;
+ break;
+ case 'J':
+ dstType = kRegTypeLongLo;
+ break;
+ default:
+ LOG_VFY("VFY: invalid iget-wide of %s.%s\n",
+ instField->field.clazz->descriptor,
+ instField->field.name);
+ dstType = kRegTypeUnknown;
+ okay = false;
+ break;
+ }
+ if (okay) {
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dstType, &okay);
+ }
+ }
+ break;
+ case OP_IGET_OBJECT:
+ {
+ ClassObject* fieldClass;
+ InstField* instField;
+ RegType objType;
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+ fieldClass = getFieldClass(meth, &instField->field);
+ if (fieldClass == NULL) {
+ /* class not found or primitive type */
+ LOG_VFY("VFY: unable to recover field class from '%s'\n",
+ instField->field.signature);
+ okay = false;
+ break;
+ }
+ if (okay) {
+ assert(!dvmIsPrimitiveClass(fieldClass));
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(fieldClass), &okay);
+ }
+ }
+ break;
+ case OP_IPUT:
+ tmpType = kRegTypeInteger;
+ goto iput_1nr_common;
+ case OP_IPUT_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto iput_1nr_common;
+ case OP_IPUT_BYTE:
+ tmpType = kRegTypeByte;
+ goto iput_1nr_common;
+ case OP_IPUT_CHAR:
+ tmpType = kRegTypeChar;
+ goto iput_1nr_common;
+ case OP_IPUT_SHORT:
+ tmpType = kRegTypeShort;
+ goto iput_1nr_common;
+iput_1nr_common:
+ {
+ RegType srcType, fieldType, objType;
+ ClassObject* fieldClass;
+ InstField* instField;
+
+ /* make sure the source register has the correct type */
+ srcType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!canConvertTo1nr(srcType, tmpType)) {
+ LOG_VFY("VFY: invalid reg type %d on iput instr (need %d)\n",
+ srcType, tmpType);
+ okay = false;
+ break;
+ }
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &instField->field, &okay);
+ if (!okay)
+ break;
+
+ /* get type of field we're storing into */
+ fieldType = primSigCharToRegType(instField->field.signature[0]);
+ if (fieldType == kRegTypeUnknown ||
+ !checkFieldArrayStore1nr(tmpType, fieldType))
+ {
+ LOG_VFY("VFY: invalid iput-1nr of %s.%s (inst=%d field=%d)\n",
+ instField->field.clazz->descriptor,
+ instField->field.name, tmpType, fieldType);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_IPUT_WIDE:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (okay) {
+ RegType typeHi =
+ getRegisterType(workRegs, insnRegCount, decInsn.vA+1, &okay);
+ checkTypeCategory(tmpType, kTypeCategory2, &okay);
+ checkWidePair(tmpType, typeHi, &okay);
+ }
+ if (okay) {
+ ClassObject* fieldClass;
+ InstField* instField;
+ RegType objType;
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &instField->field, &okay);
+ if (!okay)
+ break;
+
+ /* check the type, which should be prim */
+ switch (instField->field.signature[0]) {
+ case 'D':
+ case 'J':
+ /* these are okay (and interchangeable) */
+ break;
+ default:
+ LOG_VFY("VFY: invalid iput-wide of %s.%s\n",
+ instField->field.clazz->descriptor,
+ instField->field.name);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_IPUT_OBJECT:
+ {
+ ClassObject* fieldClass;
+ ClassObject* valueClass;
+ InstField* instField;
+ RegType objType, valueType;
+
+ objType = getRegisterType(workRegs, insnRegCount, decInsn.vB,
+ &okay);
+ if (!okay)
+ break;
+ instField = getInstField(meth, uninitMap, objType, decInsn.vC,
+ &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &instField->field, &okay);
+ if (!okay)
+ break;
+
+ fieldClass = getFieldClass(meth, &instField->field);
+ if (fieldClass == NULL) {
+ LOG_VFY("VFY: unable to recover field class from '%s'\n",
+ instField->field.signature);
+ okay = false;
+ break;
+ }
+
+ valueType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!okay)
+ break;
+ if (!regTypeIsReference(valueType)) {
+ LOG_VFY("VFY: storing non-ref v%d into ref field '%s' (%s)\n",
+ decInsn.vA, instField->field.name,
+ fieldClass->descriptor);
+ okay = false;
+ break;
+ }
+ if (valueType != kRegTypeZero) {
+ valueClass = regTypeInitializedReferenceToClass(valueType);
+ if (valueClass == NULL) {
+ LOG_VFY("VFY: storing uninit ref v%d into ref field\n",
+ decInsn.vA);
+ okay = false;
+ break;
+ }
+ /* allow if field is any interface or field is base class */
+ if (!dvmIsInterfaceClass(fieldClass) &&
+ !dvmInstanceof(valueClass, fieldClass))
+ {
+ LOG_VFY("VFY: storing type '%s' into field type '%s' (%s.%s)\n",
+ valueClass->descriptor, fieldClass->descriptor,
+ instField->field.clazz->descriptor,
+ instField->field.name);
+ okay = false;
+ break;
+ }
+ }
+ }
+ break;
+
+ case OP_SGET:
+ tmpType = kRegTypeInteger;
+ goto sget_1nr_common;
+ case OP_SGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto sget_1nr_common;
+ case OP_SGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto sget_1nr_common;
+ case OP_SGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto sget_1nr_common;
+ case OP_SGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto sget_1nr_common;
+sget_1nr_common:
+ {
+ StaticField* staticField;
+ RegType fieldType;
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+
+ /*
+ * Make sure the field's type is compatible with expectation.
+ * We can get ourselves into trouble if we mix & match loads
+ * and stores with different widths, so rather than just checking
+ * "canConvertTo1nr" we require that the field types have equal
+ * widths. (We can't generally require an exact type match,
+ * because e.g. "int" and "float" are interchangeable.)
+ */
+ fieldType = primSigCharToRegType(staticField->field.signature[0]);
+ if (!checkFieldArrayStore1nr(tmpType, fieldType)) {
+ LOG_VFY("VFY: invalid sget-1nr of %s.%s (inst=%d actual=%d)\n",
+ staticField->field.clazz->descriptor,
+ staticField->field.name, tmpType, fieldType);
+ okay = false;
+ break;
+ }
+
+ setRegisterType(workRegs, insnRegCount, decInsn.vA, tmpType, &okay);
+ }
+ break;
+ case OP_SGET_WIDE:
+ {
+ StaticField* staticField;
+ RegType dstType;
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ /* check the type, which should be prim */
+ switch (staticField->field.signature[0]) {
+ case 'D':
+ dstType = kRegTypeDoubleLo;
+ break;
+ case 'J':
+ dstType = kRegTypeLongLo;
+ break;
+ default:
+ LOG_VFY("VFY: invalid sget-wide of %s.%s\n",
+ staticField->field.clazz->descriptor,
+ staticField->field.name);
+ dstType = kRegTypeUnknown;
+ okay = false;
+ break;
+ }
+ if (okay) {
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ dstType, &okay);
+ }
+ }
+ break;
+ case OP_SGET_OBJECT:
+ {
+ StaticField* staticField;
+ ClassObject* fieldClass;
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ fieldClass = getFieldClass(meth, &staticField->field);
+ if (fieldClass == NULL) {
+ LOG_VFY("VFY: unable to recover field class from '%s'\n",
+ staticField->field.signature);
+ okay = false;
+ break;
+ }
+ if (dvmIsPrimitiveClass(fieldClass)) {
+ LOG_VFY("VFY: attempt to get prim field with sget-object\n");
+ okay = false;
+ break;
+ }
+ setRegisterType(workRegs, insnRegCount, decInsn.vA,
+ regTypeFromClass(fieldClass), &okay);
+ }
+ break;
+ case OP_SPUT:
+ tmpType = kRegTypeInteger;
+ goto sput_1nr_common;
+ case OP_SPUT_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto sput_1nr_common;
+ case OP_SPUT_BYTE:
+ tmpType = kRegTypeByte;
+ goto sput_1nr_common;
+ case OP_SPUT_CHAR:
+ tmpType = kRegTypeChar;
+ goto sput_1nr_common;
+ case OP_SPUT_SHORT:
+ tmpType = kRegTypeShort;
+ goto sput_1nr_common;
+sput_1nr_common:
+ {
+ RegType srcType, fieldType;
+ StaticField* staticField;
+
+ /* make sure the source register has the correct type */
+ srcType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!canConvertTo1nr(srcType, tmpType)) {
+ LOG_VFY("VFY: invalid reg type %d on iput instr (need %d)\n",
+ srcType, tmpType);
+ okay = false;
+ break;
+ }
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &staticField->field, &okay);
+ if (!okay)
+ break;
+
+ /*
+ * Get type of field we're storing into. We know that the
+ * contents of the register match the instruction, but we also
+ * need to ensure that the instruction matches the field type.
+ * Using e.g. sput-short to write into a 32-bit integer field
+ * can lead to trouble if we do 16-bit writes.
+ */
+ fieldType = primSigCharToRegType(staticField->field.signature[0]);
+ if (!checkFieldArrayStore1nr(tmpType, fieldType)) {
+ LOG_VFY("VFY: invalid sput-1nr of %s.%s (inst=%d actual=%d)\n",
+ staticField->field.clazz->descriptor,
+ staticField->field.name, tmpType, fieldType);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_SPUT_WIDE:
+ tmpType = getRegisterType(workRegs, insnRegCount, decInsn.vA, &okay);
+ if (okay) {
+ RegType typeHi =
+ getRegisterType(workRegs, insnRegCount, decInsn.vA+1, &okay);
+ checkTypeCategory(tmpType, kTypeCategory2, &okay);
+ checkWidePair(tmpType, typeHi, &okay);
+ }
+ if (okay) {
+ StaticField* staticField;
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &staticField->field, &okay);
+ if (!okay)
+ break;
+
+ /* check the type, which should be prim */
+ switch (staticField->field.signature[0]) {
+ case 'D':
+ case 'J':
+ /* these are okay */
+ break;
+ default:
+ LOG_VFY("VFY: invalid sput-wide of %s.%s\n",
+ staticField->field.clazz->descriptor,
+ staticField->field.name);
+ okay = false;
+ break;
+ }
+ }
+ break;
+ case OP_SPUT_OBJECT:
+ {
+ ClassObject* fieldClass;
+ ClassObject* valueClass;
+ StaticField* staticField;
+ RegType valueType;
+
+ staticField = getStaticField(meth, decInsn.vB, &okay);
+ if (!okay)
+ break;
+ checkFinalFieldAccess(meth, &staticField->field, &okay);
+ if (!okay)
+ break;
+
+ fieldClass = getFieldClass(meth, &staticField->field);
+ if (fieldClass == NULL) {
+ LOG_VFY("VFY: unable to recover field class from '%s'\n",
+ staticField->field.signature);
+ okay = false;
+ break;
+ }
+
+ valueType = getRegisterType(workRegs, insnRegCount, decInsn.vA,
+ &okay);
+ if (!okay)
+ break;
+ if (!regTypeIsReference(valueType)) {
+ LOG_VFY("VFY: storing non-ref v%d into ref field '%s' (%s)\n",
+ decInsn.vA, staticField->field.name,
+ fieldClass->descriptor);
+ okay = false;
+ break;
+ }
+ if (valueType != kRegTypeZero) {
+ valueClass = regTypeInitializedReferenceToClass(valueType);
+ if (valueClass == NULL) {
+ LOG_VFY("VFY: storing uninit ref v%d into ref field\n",
+ decInsn.vA);
+ okay = false;
+ break;
+ }
+ /* allow if field is any interface or field is base class */
+ if (!dvmIsInterfaceClass(fieldClass) &&
+ !dvmInstanceof(valueClass, fieldClass))
+ {
+ LOG_VFY("VFY: storing type '%s' into field type '%s' (%s.%s)\n",
+ valueClass->descriptor, fieldClass->descriptor,
+ staticField->field.clazz->descriptor,
+ staticField->field.name);
+ okay = false;
+ break;
+ }
+ }
+ }
+ break;
+
+ case OP_INVOKE_VIRTUAL:
+ case OP_INVOKE_VIRTUAL_RANGE:
+ case OP_INVOKE_SUPER:
+ case OP_INVOKE_SUPER_RANGE:
+ {
+ Method* calledMethod;
+ RegType returnType;
+ bool isRange;
+ bool isSuper;
+
+ isRange = (decInsn.opCode == OP_INVOKE_VIRTUAL_RANGE ||
+ decInsn.opCode == OP_INVOKE_SUPER_RANGE);
+ isSuper = (decInsn.opCode == OP_INVOKE_SUPER ||
+ decInsn.opCode == OP_INVOKE_SUPER_RANGE);
+
+ calledMethod = verifyInvocationArgs(meth, workRegs, insnRegCount,
+ &decInsn, uninitMap, METHOD_VIRTUAL, isRange,
+ isSuper, &okay);
+ if (!okay)
+ break;
+ returnType = getMethodReturnType(calledMethod);
+ setResultRegisterType(workRegs, insnRegCount, returnType, &okay);
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_DIRECT:
+ case OP_INVOKE_DIRECT_RANGE:
+ {
+ RegType returnType;
+ Method* calledMethod;
+ bool isRange;
+
+ isRange = (decInsn.opCode == OP_INVOKE_DIRECT_RANGE);
+ calledMethod = verifyInvocationArgs(meth, workRegs, insnRegCount,
+ &decInsn, uninitMap, METHOD_DIRECT, isRange,
+ false, &okay);
+ if (!okay)
+ break;
+
+ /*
+ * Some additional checks when calling <init>. We know from
+ * the invocation arg check that the "this" argument is an
+ * instance of calledMethod->clazz. Now we further restrict
+ * that to require that calledMethod->clazz is the same as
+ * this->clazz or this->super, allowing the latter only if
+ * the "this" argument is the same as the "this" argument to
+ * this method (which implies that we're in <init> ourselves).
+ */
+ if (isInitMethod(calledMethod)) {
+ RegType thisType;
+ thisType = getInvocationThis(workRegs, insnRegCount,
+ &decInsn, &okay);
+ if (!okay)
+ break;
+
+ /* no null refs allowed (?) */
+ if (thisType == kRegTypeZero) {
+ LOG_VFY("VFY: unable to initialize null ref\n");
+ okay = false;
+ break;
+ }
+
+ ClassObject* thisClass;
+
+ thisClass = regTypeReferenceToClass(thisType, uninitMap);
+ assert(thisClass != NULL);
+
+ /* must be in same class or in superclass */
+ if (calledMethod->clazz == thisClass->super) {
+ if (thisClass != meth->clazz) {
+ LOG_VFY("VFY: invoke-direct <init> on super only "
+ "allowed for 'this' in <init>");
+ okay = false;
+ break;
+ }
+ } else if (calledMethod->clazz != thisClass) {
+ LOG_VFY("VFY: invoke-direct <init> must be on current "
+ "class or super\n");
+ okay = false;
+ break;
+ }
+
+ /* arg must be an uninitialized reference */
+ if (!regTypeIsUninitReference(thisType)) {
+ LOG_VFY("VFY: can only initialize the uninitialized\n");
+ okay = false;
+ break;
+ }
+
+ /*
+ * Replace the uninitialized reference with an initialized
+ * one, and clear the entry in the uninit map. We need to
+ * do this for all registers that have the same object
+ * instance in them, not just the "this" register.
+ */
+ int uidx = regTypeToUninitIndex(thisType);
+ markRefsAsInitialized(workRegs, insnRegCount, uninitMap,
+ thisType, &okay);
+ if (!okay)
+ break;
+ }
+ returnType = getMethodReturnType(calledMethod);
+ setResultRegisterType(workRegs, insnRegCount,
+ returnType, &okay);
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_STATIC:
+ case OP_INVOKE_STATIC_RANGE:
+ {
+ RegType returnType;
+ Method* calledMethod;
+ bool isRange;
+
+ isRange = (decInsn.opCode == OP_INVOKE_STATIC_RANGE);
+ calledMethod = verifyInvocationArgs(meth, workRegs, insnRegCount,
+ &decInsn, uninitMap, METHOD_STATIC, isRange,
+ false, &okay);
+ if (!okay)
+ break;
+
+ returnType = getMethodReturnType(calledMethod);
+ setResultRegisterType(workRegs, insnRegCount, returnType, &okay);
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_INTERFACE:
+ case OP_INVOKE_INTERFACE_RANGE:
+ {
+ RegType /*thisType,*/ returnType;
+ Method* absMethod;
+ bool isRange;
+
+ isRange = (decInsn.opCode == OP_INVOKE_INTERFACE_RANGE);
+ absMethod = verifyInvocationArgs(meth, workRegs, insnRegCount,
+ &decInsn, uninitMap, METHOD_INTERFACE, isRange,
+ false, &okay);
+ if (!okay)
+ break;
+
+#if 0 /* can't do this here, fails on dalvik test 052-verifier-fun */
+ /*
+ * Get the type of the "this" arg, which should always be an
+ * interface class. Because we don't do a full merge on
+ * interface classes, this might have reduced to Object.
+ */
+ thisType = getInvocationThis(workRegs, insnRegCount,
+ &decInsn, &okay);
+ if (!okay)
+ break;
+
+ if (thisType == kRegTypeZero) {
+ /* null pointer always passes (and always fails at runtime) */
+ } else {
+ ClassObject* thisClass;
+
+ thisClass = regTypeInitializedReferenceToClass(thisType);
+ if (thisClass == NULL) {
+ LOG_VFY("VFY: interface call on uninitialized\n");
+ okay = false;
+ break;
+ }
+
+ /*
+ * Either "thisClass" needs to be the interface class that
+ * defined absMethod, or absMethod's class needs to be one
+ * of the interfaces implemented by "thisClass". (Or, if
+ * we couldn't complete the merge, this will be Object.)
+ */
+ if (thisClass != absMethod->clazz &&
+ thisClass != gDvm.classJavaLangObject &&
+ !dvmImplements(thisClass, absMethod->clazz))
+ {
+ LOG_VFY("VFY: unable to match absMethod '%s' with %s interfaces\n",
+ absMethod->name, thisClass->descriptor);
+ okay = false;
+ break;
+ }
+ }
+#endif
+
+ /*
+ * We don't have an object instance, so we can't find the
+ * concrete method. However, all of the type information is
+ * in the abstract method, so we're good.
+ */
+ returnType = getMethodReturnType(absMethod);
+ setResultRegisterType(workRegs, insnRegCount, returnType, &okay);
+ justSetResult = true;
+ }
+ break;
+
+ case OP_NEG_INT:
+ case OP_NOT_INT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, &okay);
+ break;
+ case OP_NEG_LONG:
+ case OP_NOT_LONG:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeLongLo, &okay);
+ break;
+ case OP_NEG_FLOAT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeFloat, &okay);
+ break;
+ case OP_NEG_DOUBLE:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeDoubleLo, &okay);
+ break;
+ case OP_INT_TO_LONG:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeInteger, &okay);
+ break;
+ case OP_INT_TO_FLOAT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeInteger, &okay);
+ break;
+ case OP_INT_TO_DOUBLE:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeInteger, &okay);
+ break;
+ case OP_LONG_TO_INT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeLongLo, &okay);
+ break;
+ case OP_LONG_TO_FLOAT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeLongLo, &okay);
+ break;
+ case OP_LONG_TO_DOUBLE:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeLongLo, &okay);
+ break;
+ case OP_FLOAT_TO_INT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeFloat, &okay);
+ break;
+ case OP_FLOAT_TO_LONG:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeFloat, &okay);
+ break;
+ case OP_FLOAT_TO_DOUBLE:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeFloat, &okay);
+ break;
+ case OP_DOUBLE_TO_INT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeDoubleLo, &okay);
+ break;
+ case OP_DOUBLE_TO_LONG:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeDoubleLo, &okay);
+ break;
+ case OP_DOUBLE_TO_FLOAT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeDoubleLo, &okay);
+ break;
+ case OP_INT_TO_BYTE:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeByte, kRegTypeInteger, &okay);
+ break;
+ case OP_INT_TO_CHAR:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeChar, kRegTypeInteger, &okay);
+ break;
+ case OP_INT_TO_SHORT:
+ checkUnop(workRegs, insnRegCount, &decInsn,
+ kRegTypeShort, kRegTypeInteger, &okay);
+ break;
+
+ case OP_ADD_INT:
+ case OP_SUB_INT:
+ case OP_MUL_INT:
+ case OP_REM_INT:
+ case OP_DIV_INT:
+ case OP_SHL_INT:
+ case OP_SHR_INT:
+ case OP_USHR_INT:
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, kRegTypeInteger, false, &okay);
+ break;
+ case OP_AND_INT:
+ case OP_OR_INT:
+ case OP_XOR_INT:
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, kRegTypeInteger, true, &okay);
+ break;
+ case OP_ADD_LONG:
+ case OP_SUB_LONG:
+ case OP_MUL_LONG:
+ case OP_DIV_LONG:
+ case OP_REM_LONG:
+ case OP_AND_LONG:
+ case OP_OR_LONG:
+ case OP_XOR_LONG:
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeLongLo, kRegTypeLongLo, false, &okay);
+ break;
+ case OP_SHL_LONG:
+ case OP_SHR_LONG:
+ case OP_USHR_LONG:
+ /* shift distance is Int, making these different from other binops */
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeLongLo, kRegTypeInteger, false, &okay);
+ break;
+ case OP_ADD_FLOAT:
+ case OP_SUB_FLOAT:
+ case OP_MUL_FLOAT:
+ case OP_DIV_FLOAT:
+ case OP_REM_FLOAT:
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeFloat, kRegTypeFloat, false, &okay);
+ break;
+ case OP_ADD_DOUBLE:
+ case OP_SUB_DOUBLE:
+ case OP_MUL_DOUBLE:
+ case OP_DIV_DOUBLE:
+ case OP_REM_DOUBLE:
+ checkBinop(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeDoubleLo, kRegTypeDoubleLo, false, &okay);
+ break;
+ case OP_ADD_INT_2ADDR:
+ case OP_SUB_INT_2ADDR:
+ case OP_MUL_INT_2ADDR:
+ case OP_REM_INT_2ADDR:
+ case OP_SHL_INT_2ADDR:
+ case OP_SHR_INT_2ADDR:
+ case OP_USHR_INT_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, kRegTypeInteger, false, &okay);
+ break;
+ case OP_AND_INT_2ADDR:
+ case OP_OR_INT_2ADDR:
+ case OP_XOR_INT_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, kRegTypeInteger, true, &okay);
+ break;
+ case OP_DIV_INT_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, kRegTypeInteger, false, &okay);
+ break;
+ case OP_ADD_LONG_2ADDR:
+ case OP_SUB_LONG_2ADDR:
+ case OP_MUL_LONG_2ADDR:
+ case OP_DIV_LONG_2ADDR:
+ case OP_REM_LONG_2ADDR:
+ case OP_AND_LONG_2ADDR:
+ case OP_OR_LONG_2ADDR:
+ case OP_XOR_LONG_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeLongLo, kRegTypeLongLo, false, &okay);
+ break;
+ case OP_SHL_LONG_2ADDR:
+ case OP_SHR_LONG_2ADDR:
+ case OP_USHR_LONG_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeLongLo, kRegTypeLongLo, kRegTypeInteger, false, &okay);
+ break;
+ case OP_ADD_FLOAT_2ADDR:
+ case OP_SUB_FLOAT_2ADDR:
+ case OP_MUL_FLOAT_2ADDR:
+ case OP_DIV_FLOAT_2ADDR:
+ case OP_REM_FLOAT_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeFloat, kRegTypeFloat, kRegTypeFloat, false, &okay);
+ break;
+ case OP_ADD_DOUBLE_2ADDR:
+ case OP_SUB_DOUBLE_2ADDR:
+ case OP_MUL_DOUBLE_2ADDR:
+ case OP_DIV_DOUBLE_2ADDR:
+ case OP_REM_DOUBLE_2ADDR:
+ checkBinop2addr(workRegs, insnRegCount, &decInsn,
+ kRegTypeDoubleLo, kRegTypeDoubleLo, kRegTypeDoubleLo, false, &okay);
+ break;
+ case OP_ADD_INT_LIT16:
+ case OP_RSUB_INT:
+ case OP_MUL_INT_LIT16:
+ case OP_DIV_INT_LIT16:
+ case OP_REM_INT_LIT16:
+ checkLitop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, false, &okay);
+ break;
+ case OP_AND_INT_LIT16:
+ case OP_OR_INT_LIT16:
+ case OP_XOR_INT_LIT16:
+ checkLitop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, true, &okay);
+ break;
+ case OP_ADD_INT_LIT8:
+ case OP_RSUB_INT_LIT8:
+ case OP_MUL_INT_LIT8:
+ case OP_DIV_INT_LIT8:
+ case OP_REM_INT_LIT8:
+ case OP_SHL_INT_LIT8:
+ case OP_SHR_INT_LIT8:
+ case OP_USHR_INT_LIT8:
+ checkLitop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, false, &okay);
+ break;
+ case OP_AND_INT_LIT8:
+ case OP_OR_INT_LIT8:
+ case OP_XOR_INT_LIT8:
+ checkLitop(workRegs, insnRegCount, &decInsn,
+ kRegTypeInteger, kRegTypeInteger, true, &okay);
+ break;
+
+
+ /*
+ * Verifying "quickened" instructions is tricky, because we have
+ * discarded the original field/method information. The byte offsets
+ * and vtable indices only have meaning in the context of an object
+ * instance.
+ *
+ * If a piece of code declares a local reference variable, assigns
+ * null to it, and then issues a virtual method call on it, we
+ * cannot evaluate the method call during verification. This situation
+ * isn't hard to handle, since we know the call will always result in an
+ * NPE, and the arguments and return value don't matter. Any code that
+ * depends on the result of the method call is inaccessible, so the
+ * fact that we can't fully verify anything that comes after the bad
+ * call is not a problem.
+ *
+ * We must also consider the case of multiple code paths, only some of
+ * which involve a null reference. We can completely verify the method
+ * if we sidestep the results of executing with a null reference.
+ * For example, if on the first pass through the code we try to do a
+ * virtual method invocation through a null ref, we have to skip the
+ * method checks and have the method return a "wildcard" type (which
+ * merges with anything to become that other thing). The move-result
+ * will tell us if it's a reference, single-word numeric, or double-word
+ * value. We continue to perform the verification, and at the end of
+ * the function any invocations that were never fully exercised are
+ * marked as null-only.
+ *
+ * We would do something similar for the field accesses. The field's
+ * type, once known, can be used to recover the width of short integers.
+ * If the object reference was null, the field-get returns the "wildcard"
+ * type, which is acceptable for any operation.
+ */
+ case OP_EXECUTE_INLINE:
+ case OP_INVOKE_DIRECT_EMPTY:
+ case OP_IGET_QUICK:
+ case OP_IGET_WIDE_QUICK:
+ case OP_IGET_OBJECT_QUICK:
+ case OP_IPUT_QUICK:
+ case OP_IPUT_WIDE_QUICK:
+ case OP_IPUT_OBJECT_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+ case OP_INVOKE_SUPER_QUICK:
+ case OP_INVOKE_SUPER_QUICK_RANGE:
+ okay = false;
+ break;
+
+ /* these should never appear */
+ case OP_UNUSED_3E:
+ case OP_UNUSED_3F:
+ case OP_UNUSED_40:
+ case OP_UNUSED_41:
+ case OP_UNUSED_42:
+ case OP_UNUSED_43:
+ case OP_UNUSED_73:
+ case OP_UNUSED_79:
+ case OP_UNUSED_7A:
+ case OP_UNUSED_E3:
+ case OP_UNUSED_E4:
+ case OP_UNUSED_E5:
+ case OP_UNUSED_E6:
+ case OP_UNUSED_E7:
+ case OP_UNUSED_E8:
+ case OP_UNUSED_E9:
+ case OP_UNUSED_EA:
+ case OP_UNUSED_EB:
+ case OP_UNUSED_EC:
+ case OP_UNUSED_ED:
+ case OP_UNUSED_EF:
+ case OP_UNUSED_F1:
+ case OP_UNUSED_FC:
+ case OP_UNUSED_FD:
+ case OP_UNUSED_FE:
+ case OP_UNUSED_FF:
+ okay = false;
+ break;
+
+ /*
+ * DO NOT add a "default" clause here. Without it the compiler will
+ * complain if an instruction is missing (which is desirable).
+ */
+ }
+
+ if (!okay) {
+ LOG_VFY_METH(meth, "VFY: rejecting opcode 0x%02x at 0x%04x\n",
+ decInsn.opCode, insnIdx);
+ goto bail;
+ }
+
+ /*
+ * If we didn't just set the result register, clear it out. This
+ * ensures that you can only use "move-result" immediately after the
+ * result is set.
+ */
+ if (!justSetResult) {
+ int reg = RESULT_REGISTER(insnRegCount);
+ workRegs[reg] = workRegs[reg+1] = kRegTypeUnknown;
+ }
+
+ /*
+ * Handle "continue". Tag the next consecutive instruction.
+ */
+ if ((nextFlags & kInstrCanContinue) != 0) {
+ int insnWidth = dvmInsnGetWidth(insnFlags, insnIdx);
+ if (insnIdx+insnWidth >= insnsSize) {
+ LOG_VFY_METH(meth,
+ "VFY: execution can walk off end of code area (from 0x%x)\n",
+ insnIdx);
+ goto bail;
+ }
+
+ /*
+ * The only way to get to a move-exception instruction is to get
+ * thrown there. Make sure the next instruction isn't one.
+ */
+ if (!checkMoveException(meth, insnIdx+insnWidth, "next"))
+ goto bail;
+
+ /*
+ * We want to update the registers and set the "changed" flag on the
+ * next instruction (if necessary). We may not be storing register
+ * changes for all addresses, so for non-branch targets we just
+ * compare "entry" vs. "work" to see if we've changed anything.
+ */
+ if (getRegisterLine(regTable, insnIdx+insnWidth) != NULL) {
+ updateRegisters(meth, insnFlags, regTable, insnIdx+insnWidth,
+ workRegs);
+ } else {
+ /* if not yet visited, or regs were updated, set "changed" */
+ if (!dvmInsnIsVisited(insnFlags, insnIdx+insnWidth) ||
+ compareRegisters(workRegs, entryRegs,
+ insnRegCount + kExtraRegs) != 0)
+ {
+ dvmInsnSetChanged(insnFlags, insnIdx+insnWidth, true);
+ }
+ }
+ }
+
+ /*
+ * Handle "branch". Tag the branch target.
+ *
+ * NOTE: instructions like OP_EQZ provide information about the state
+ * of the register when the branch is taken or not taken. For example,
+ * somebody could get a reference field, check it for zero, and if the
+ * branch is taken immediately store that register in a boolean field
+ * since the value is known to be zero. We do not currently account for
+ * that, and will reject the code.
+ */
+ if ((nextFlags & kInstrCanBranch) != 0) {
+ bool isConditional;
+
+ if (!dvmGetBranchTarget(meth, insnFlags, insnIdx, &branchTarget,
+ &isConditional))
+ {
+ /* should never happen after static verification */
+ LOG_VFY_METH(meth, "VFY: bad branch at %d\n", insnIdx);
+ goto bail;
+ }
+ assert(isConditional || (nextFlags & kInstrCanContinue) == 0);
+ assert(!isConditional || (nextFlags & kInstrCanContinue) != 0);
+
+ if (!checkMoveException(meth, insnIdx+branchTarget, "branch"))
+ goto bail;
+
+ updateRegisters(meth, insnFlags, regTable, insnIdx+branchTarget,
+ workRegs);
+ }
+
+ /*
+ * Handle "switch". Tag all possible branch targets.
+ *
+ * We've already verified that the table is structurally sound, so we
+ * just need to walk through and tag the targets.
+ */
+ if ((nextFlags & kInstrCanSwitch) != 0) {
+ int offsetToSwitch = insns[1] | (((s4)insns[2]) << 16);
+ const u2* switchInsns = insns + offsetToSwitch;
+ int switchCount = switchInsns[1];
+ int offsetToTargets, targ;
+
+ if ((*insns & 0xff) == OP_PACKED_SWITCH) {
+ /* 0=sig, 1=count, 2/3=firstKey */
+ offsetToTargets = 4;
+ } else {
+ /* 0=sig, 1=count, 2..count*2 = keys */
+ assert((*insns & 0xff) == OP_SPARSE_SWITCH);
+ offsetToTargets = 2 + 2*switchCount;
+ }
+
+ /* verify each switch target */
+ for (targ = 0; targ < switchCount; targ++) {
+ int offset, absOffset;
+
+ /* offsets are 32-bit, and only partly endian-swapped */
+ offset = switchInsns[offsetToTargets + targ*2] |
+ (((s4) switchInsns[offsetToTargets + targ*2 +1]) << 16);
+ absOffset = insnIdx + offset;
+
+ assert(absOffset >= 0 && absOffset < insnsSize);
+
+ if (!checkMoveException(meth, absOffset, "switch"))
+ goto bail;
+
+ updateRegisters(meth, insnFlags, regTable, absOffset, workRegs);
+ }
+ }
+
+ /*
+ * Handle instructions that can throw and that are sitting in a
+ * "try" block. (If they're not in a "try" block when they throw,
+ * control transfers out of the method.)
+ */
+ if ((nextFlags & kInstrCanThrow) != 0 && dvmInsnIsInTry(insnFlags, insnIdx))
+ {
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexCode* pCode = dvmGetMethodCode(meth);
+ DexCatchIterator iterator;
+
+ if (dexFindCatchHandler(&iterator, pCode, insnIdx)) {
+ for (;;) {
+ DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
+
+ if (handler == NULL) {
+ break;
+ }
+
+ /* note we use entryRegs, not workRegs */
+ updateRegisters(meth, insnFlags, regTable, handler->address,
+ entryRegs);
+ }
+ }
+ }
+
+ /*
+ * Update startGuess. Advance to the next instruction of that's
+ * possible, otherwise use the branch target if one was found. If
+ * neither of those exists we're in a return or throw; leave startGuess
+ * alone and let the caller sort it out.
+ */
+ if ((nextFlags & kInstrCanContinue) != 0) {
+ *pStartGuess = insnIdx + dvmInsnGetWidth(insnFlags, insnIdx);
+ } else if ((nextFlags & kInstrCanBranch) != 0) {
+ /* we're still okay if branchTarget is zero */
+ *pStartGuess = insnIdx + branchTarget;
+ }
+
+ assert(*pStartGuess >= 0 && *pStartGuess < insnsSize &&
+ dvmInsnGetWidth(insnFlags, *pStartGuess) != 0);
+
+ result = true;
+
+bail:
+ return result;
+}
+
+/*
+ * callback function used in dumpRegTypes to print local vars
+ * valid at a given address.
+ */
+static void logLocalsCb(void *cnxt, u2 reg, u4 startAddress, u4 endAddress,
+ const char *name, const char *descriptor,
+ const char *signature)
+{
+ int addr = *((int *)cnxt);
+
+ if (addr >= (int) startAddress && addr < (int) endAddress)
+ {
+ LOGI(" %2d: '%s' %s\n", reg, name, descriptor);
+ }
+}
+
+/*
+ * Dump the register types for the specifed address to the log file.
+ */
+static void dumpRegTypes(const Method* meth, const InsnFlags* insnFlags,
+ const RegType* addrRegs, int addr, const char* addrName,
+ const UninitInstanceMap* uninitMap, int displayFlags)
+{
+ int regCount = meth->registersSize;
+ int fullRegCount = regCount + kExtraRegs;
+ bool branchTarget = dvmInsnIsBranchTarget(insnFlags, addr);
+ int i;
+
+ assert(addr >= 0 && addr < (int) dvmGetMethodInsnsSize(meth));
+
+ int regCharSize = fullRegCount + (fullRegCount-1)/4 + 2 +1;
+ char regChars[regCharSize +1];
+ memset(regChars, ' ', regCharSize);
+ regChars[0] = '[';
+ if (regCount == 0)
+ regChars[1] = ']';
+ else
+ regChars[1 + (regCount-1) + (regCount-1)/4 +1] = ']';
+ regChars[regCharSize] = '\0';
+
+ //const RegType* addrRegs = getRegisterLine(regTable, addr);
+
+ for (i = 0; i < regCount + kExtraRegs; i++) {
+ char tch;
+
+ switch (addrRegs[i]) {
+ case kRegTypeUnknown: tch = '.'; break;
+ case kRegTypeConflict: tch = 'X'; break;
+ case kRegTypeFloat: tch = 'F'; break;
+ case kRegTypeZero: tch = '0'; break;
+ case kRegTypeOne: tch = '1'; break;
+ case kRegTypeBoolean: tch = 'Z'; break;
+ case kRegTypePosByte: tch = 'b'; break;
+ case kRegTypeByte: tch = 'B'; break;
+ case kRegTypePosShort: tch = 's'; break;
+ case kRegTypeShort: tch = 'S'; break;
+ case kRegTypeChar: tch = 'C'; break;
+ case kRegTypeInteger: tch = 'I'; break;
+ case kRegTypeLongLo: tch = 'J'; break;
+ case kRegTypeLongHi: tch = 'j'; break;
+ case kRegTypeDoubleLo: tch = 'D'; break;
+ case kRegTypeDoubleHi: tch = 'd'; break;
+ default:
+ if (regTypeIsReference(addrRegs[i])) {
+ if (regTypeIsUninitReference(addrRegs[i]))
+ tch = 'U';
+ else
+ tch = 'L';
+ } else {
+ tch = '*';
+ assert(false);
+ }
+ break;
+ }
+
+ if (i < regCount)
+ regChars[1 + i + (i/4)] = tch;
+ else
+ regChars[1 + i + (i/4) + 2] = tch;
+ }
+
+ if (addr == 0 && addrName != NULL)
+ LOGI("%c%s %s\n", branchTarget ? '>' : ' ', addrName, regChars);
+ else
+ LOGI("%c0x%04x %s\n", branchTarget ? '>' : ' ', addr, regChars);
+
+ if (displayFlags & DRT_SHOW_REF_TYPES) {
+ for (i = 0; i < regCount + kExtraRegs; i++) {
+ if (regTypeIsReference(addrRegs[i]) && addrRegs[i] != kRegTypeZero)
+ {
+ ClassObject* clazz;
+
+ clazz = regTypeReferenceToClass(addrRegs[i], uninitMap);
+ assert(dvmValidateObject((Object*)clazz));
+ if (i < regCount) {
+ LOGI(" %2d: 0x%08x %s%s\n",
+ i, addrRegs[i],
+ regTypeIsUninitReference(addrRegs[i]) ? "[U]" : "",
+ clazz->descriptor);
+ } else {
+ LOGI(" RS: 0x%08x %s%s\n",
+ addrRegs[i],
+ regTypeIsUninitReference(addrRegs[i]) ? "[U]" : "",
+ clazz->descriptor);
+ }
+ }
+ }
+ }
+ if (displayFlags & DRT_SHOW_LOCALS) {
+ dexDecodeDebugInfo(meth->clazz->pDvmDex->pDexFile,
+ dvmGetMethodCode(meth),
+ meth->clazz->descriptor,
+ meth->prototype.protoIdx,
+ meth->accessFlags,
+ NULL, logLocalsCb, &addr);
+ }
+}
+
diff --git a/vm/analysis/CodeVerify.h b/vm/analysis/CodeVerify.h
new file mode 100644
index 0000000..0cd4638
--- /dev/null
+++ b/vm/analysis/CodeVerify.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik bytecode verifier.
+ */
+#ifndef _DALVIK_CODEVERIFY
+#define _DALVIK_CODEVERIFY
+
+#include "analysis/VerifySubs.h"
+
+
+/*
+ * Enumeration for register type values. The "hi" piece of a 64-bit value
+ * MUST immediately follow the "lo" piece in the enumeration, so we can check
+ * that hi==lo+1.
+ *
+ * Assignment of constants:
+ * [-MAXINT,-32768) : integer
+ * [-32768,-128) : short
+ * [-128,0) : byte
+ * 0 : zero
+ * 1 : one
+ * [2,128) : posbyte
+ * [128,32768) : posshort
+ * [32768,65536) : char
+ * [65536,MAXINT] : integer
+ *
+ * Allowed "implicit" widening conversions:
+ * zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
+ * one -> boolean, posbyte, byte, posshort, short, char, integer
+ * boolean -> posbyte, byte, posshort, short, char, integer
+ * posbyte -> posshort, short, integer, char
+ * byte -> short, integer
+ * posshort -> integer, char
+ * short -> integer
+ * char -> integer
+ *
+ * In addition, all of the above can convert to "float".
+ *
+ * We're more careful with integer values than the spec requires. The
+ * motivation is to restrict byte/char/short to the correct range of values.
+ * For example, if a method takes a byte argument, we don't want to allow
+ * the code to load the constant "1024" and pass it in.
+ */
+enum {
+ kRegTypeUnknown = 0, /* initial state; use value=0 so calloc works */
+ kRegTypeUninit = 1, /* MUST be odd to distinguish from pointer */
+ kRegTypeConflict, /* merge clash makes this reg's type unknowable */
+
+ /*
+ * Category-1nr types. The order of these is chiseled into a couple
+ * of tables, so don't add, remove, or reorder if you can avoid it.
+ */
+#define kRegType1nrSTART kRegTypeFloat
+ kRegTypeFloat,
+ kRegTypeZero, /* 32-bit 0, could be Boolean, Int, Float, or Ref */
+ kRegTypeOne, /* 32-bit 1, could be Boolean, Int, Float */
+ kRegTypeBoolean, /* must be 0 or 1 */
+ kRegTypePosByte, /* byte, known positive (can become char) */
+ kRegTypeByte,
+ kRegTypePosShort, /* short, known positive (can become char) */
+ kRegTypeShort,
+ kRegTypeChar,
+ kRegTypeInteger,
+#define kRegType1nrEND kRegTypeInteger
+
+ kRegTypeLongLo, /* lower-numbered register; endian-independent */
+ kRegTypeLongHi,
+ kRegTypeDoubleLo,
+ kRegTypeDoubleHi,
+
+ /*
+ * Enumeration max; this is used with "full" (32-bit) RegType values.
+ *
+ * Anything larger than this is a ClassObject or uninit ref. Mask off
+ * all but the low 8 bits; if you're left with kRegTypeUninit, pull
+ * the uninit index out of the high 24. Because kRegTypeUninit has an
+ * odd value, there is no risk of a particular ClassObject pointer bit
+ * pattern being confused for it (assuming our class object allocator
+ * uses word alignment).
+ */
+ kRegTypeMAX
+};
+#define kRegTypeUninitMask 0xff
+#define kRegTypeUninitShift 8
+
+/*
+ * RegType holds information about the type of data held in a register.
+ * For most types it's a simple enum. For reference types it holds a
+ * pointer to the ClassObject, and for uninitialized references it holds
+ * an index into the UninitInstanceMap.
+ */
+typedef u4 RegType;
+
+/* table with merge logic for primitive types */
+extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
+
+
+/*
+ * Returns "true" if the flags indicate that this address holds the start
+ * of an instruction.
+ */
+INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
+}
+
+/*
+ * Extract the unsigned 16-bit instruction width from "flags".
+ */
+INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
+ return insnFlags[addr] & kInsnFlagWidthMask;
+}
+
+/*
+ * Changed?
+ */
+INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagChanged) != 0;
+}
+INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
+{
+ if (changed)
+ insnFlags[addr] |= kInsnFlagChanged;
+ else
+ insnFlags[addr] &= ~kInsnFlagChanged;
+}
+
+/*
+ * Visited?
+ */
+INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagVisited) != 0;
+}
+INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
+{
+ if (changed)
+ insnFlags[addr] |= kInsnFlagVisited;
+ else
+ insnFlags[addr] &= ~kInsnFlagVisited;
+}
+
+/*
+ * Visited or changed?
+ */
+INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
+}
+
+/*
+ * In a "try" block?
+ */
+INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagInTry) != 0;
+}
+INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
+{
+ assert(inTry);
+ //if (inTry)
+ insnFlags[addr] |= kInsnFlagInTry;
+ //else
+ // insnFlags[addr] &= ~kInsnFlagInTry;
+}
+
+/*
+ * Instruction is a branch target or exception handler?
+ */
+INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
+}
+INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
+ bool isBranch)
+{
+ assert(isBranch);
+ //if (isBranch)
+ insnFlags[addr] |= kInsnFlagBranchTarget;
+ //else
+ // insnFlags[addr] &= ~kInsnFlagBranchTarget;
+}
+
+/*
+ * Instruction is a GC point?
+ */
+INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
+ return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
+}
+INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
+ bool isBranch)
+{
+ assert(isBranch);
+ //if (isBranch)
+ insnFlags[addr] |= kInsnFlagGcPoint;
+ //else
+ // insnFlags[addr] &= ~kInsnFlagGcPoint;
+}
+
+
+/*
+ * Table that maps uninitialized instances to classes, based on the
+ * address of the new-instance instruction.
+ */
+typedef struct UninitInstanceMap {
+ int numEntries;
+ struct {
+ int addr; /* code offset, or -1 for method arg ("this") */
+ ClassObject* clazz; /* class created at this address */
+ } map[1];
+} UninitInstanceMap;
+#define kUninitThisArgAddr (-1)
+#define kUninitThisArgSlot 0
+
+/*
+ * Create a new UninitInstanceMap.
+ */
+UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
+ const InsnFlags* insnFlags, int newInstanceCount);
+
+/*
+ * Release the storage associated with an UninitInstanceMap.
+ */
+void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
+
+/*
+ * Associate a class with an address. Returns the map slot index, or -1
+ * if the address isn't listed in the map (shouldn't happen) or if a
+ * different class is already associated with the address (shouldn't
+ * happen either).
+ */
+//int dvmSetUninitInstance(UninitInstanceMap* uninitMap, int addr,
+// ClassObject* clazz);
+
+/*
+ * Return the class associated with an uninitialized reference. Pass in
+ * the map index.
+ */
+//ClassObject* dvmGetUninitInstance(const UninitInstanceMap* uninitMap, int idx);
+
+/*
+ * Clear the class associated with an uninitialized reference. Pass in
+ * the map index.
+ */
+//void dvmClearUninitInstance(UninitInstanceMap* uninitMap, int idx);
+
+
+/*
+ * Verify bytecode in "meth". "insnFlags" should be populated with
+ * instruction widths and "in try" flags.
+ */
+bool dvmVerifyCodeFlow(const Method* meth, InsnFlags* insnFlags,
+ UninitInstanceMap* uninitMap);
+
+#endif /*_DALVIK_CODEVERIFY*/
diff --git a/vm/analysis/DexOptimize.c b/vm/analysis/DexOptimize.c
new file mode 100644
index 0000000..d086b99
--- /dev/null
+++ b/vm/analysis/DexOptimize.c
@@ -0,0 +1,2152 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Convert the output from "dx" into a locally-optimized DEX file.
+ *
+ * TODO: the format of the optimized header is currently "whatever we
+ * happen to write", since the VM that writes it is by definition the same
+ * as the VM that reads it. Still, it should be better documented and
+ * more rigorously structured.
+ */
+#include "Dalvik.h"
+#include "libdex/InstrUtils.h"
+#include "libdex/OptInvocation.h"
+
+#include <zlib.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <errno.h>
+
+/*
+ * Virtual/direct calls to "method" are replaced with an execute-inline
+ * instruction with index "idx".
+ */
+typedef struct InlineSub {
+ Method* method;
+ int inlineIdx;
+} InlineSub;
+
+
+/* fwd */
+static int writeDependencies(int fd, u4 modWhen, u4 crc);
+static bool writeAuxData(int fd, const DexClassLookup* pClassLookup,\
+ const IndexMapSet* pIndexMapSet);
+static void logFailedWrite(size_t expected, ssize_t actual, const char* msg,
+ int err);
+
+static bool rewriteDex(u1* addr, int len, bool doVerify, bool doOpt,\
+ u4* pHeaderFlags, DexClassLookup** ppClassLookup);
+static void updateChecksum(u1* addr, int len, DexHeader* pHeader);
+static bool loadAllClasses(DvmDex* pDvmDex);
+static void optimizeLoadedClasses(DexFile* pDexFile);
+static void optimizeClass(ClassObject* clazz, const InlineSub* inlineSubs);
+static bool optimizeMethod(Method* method, const InlineSub* inlineSubs);
+static void rewriteInstField(Method* method, u2* insns, OpCode newOpc);
+static bool rewriteVirtualInvoke(Method* method, u2* insns, OpCode newOpc);
+static bool rewriteDirectInvoke(Method* method, u2* insns);
+static bool rewriteExecuteInline(Method* method, u2* insns,
+ MethodType methodType, const InlineSub* inlineSubs);
+
+
+/*
+ * Return the fd of an open file in the DEX file cache area. If the cache
+ * file doesn't exist or is out of date, this will remove the old entry,
+ * create a new one (writing only the file header), and return with the
+ * "new file" flag set.
+ *
+ * It's possible to execute from an unoptimized DEX file directly,
+ * assuming the byte ordering and structure alignment is correct, but
+ * disadvantageous because some significant optimizations are not possible.
+ * It's not generally possible to do the same from an uncompressed Jar
+ * file entry, because we have to guarantee 32-bit alignment in the
+ * memory-mapped file.
+ *
+ * For a Jar/APK file (a zip archive with "classes.dex" inside), "modWhen"
+ * and "crc32" come from the Zip directory entry. For a stand-alone DEX
+ * file, it's the modification date of the file and the Adler32 from the
+ * DEX header (which immediately follows the magic). If these don't
+ * match what's stored in the opt header, we reject the file immediately.
+ *
+ * On success, the file descriptor will be positioned just past the "opt"
+ * file header, and will be locked with flock. "*pCachedName" will point
+ * to newly-allocated storage.
+ */
+int dvmOpenCachedDexFile(const char* fileName, const char* cacheFileName,
+ u4 modWhen, u4 crc, bool isBootstrap, bool* pNewFile, bool createIfMissing)
+{
+ int fd, cc;
+ struct stat fdStat, fileStat;
+ bool readOnly = false;
+
+ *pNewFile = false;
+
+retry:
+ /*
+ * Try to open the cache file. If we've been asked to,
+ * create it if it doesn't exist.
+ */
+ fd = createIfMissing ? open(cacheFileName, O_CREAT|O_RDWR, 0644) : -1;
+ if (fd < 0) {
+ fd = open(cacheFileName, O_RDONLY, 0);
+ if (fd < 0) {
+ if (createIfMissing) {
+ LOGE("Can't open dex cache '%s': %s\n",
+ cacheFileName, strerror(errno));
+ }
+ return fd;
+ }
+ readOnly = true;
+ }
+
+ /*
+ * Grab an exclusive lock on the cache file. If somebody else is
+ * working on it, we'll block here until they complete. Because
+ * we're waiting on an external resource, we go into VMWAIT mode.
+ */
+ int oldStatus;
+ LOGV("DexOpt: locking cache file %s (fd=%d, boot=%d)\n",
+ cacheFileName, fd, isBootstrap);
+ oldStatus = dvmChangeStatus(NULL, THREAD_VMWAIT);
+ cc = flock(fd, LOCK_EX | LOCK_NB);
+ if (cc != 0) {
+ LOGD("DexOpt: sleeping on flock(%s)\n", cacheFileName);
+ cc = flock(fd, LOCK_EX);
+ }
+ dvmChangeStatus(NULL, oldStatus);
+ if (cc != 0) {
+ LOGE("Can't lock dex cache '%s': %d\n", cacheFileName, cc);
+ close(fd);
+ return -1;
+ }
+ LOGV("DexOpt: locked cache file\n");
+
+ /*
+ * Check to see if the fd we opened and locked matches the file in
+ * the filesystem. If they don't, then somebody else unlinked ours
+ * and created a new file, and we need to use that one instead. (If
+ * we caught them between the unlink and the create, we'll get an
+ * ENOENT from the file stat.)
+ */
+ cc = fstat(fd, &fdStat);
+ if (cc != 0) {
+ LOGE("Can't stat open file '%s'\n", cacheFileName);
+ LOGVV("DexOpt: unlocking cache file %s\n", cacheFileName);
+ goto close_fail;
+ }
+ cc = stat(cacheFileName, &fileStat);
+ if (cc != 0 ||
+ fdStat.st_dev != fileStat.st_dev || fdStat.st_ino != fileStat.st_ino)
+ {
+ LOGD("DexOpt: our open cache file is stale; sleeping and retrying\n");
+ LOGVV("DexOpt: unlocking cache file %s\n", cacheFileName);
+ flock(fd, LOCK_UN);
+ close(fd);
+ usleep(250 * 1000); /* if something is hosed, don't peg machine */
+ goto retry;
+ }
+
+ /*
+ * We have the correct file open and locked. If the file size is zero,
+ * then it was just created by us, and we want to fill in some fields
+ * in the "opt" header and set "*pNewFile". Otherwise, we want to
+ * verify that the fields in the header match our expectations, and
+ * reset the file if they don't.
+ */
+ if (fdStat.st_size == 0) {
+ if (readOnly) {
+ LOGW("DexOpt: file has zero length and isn't writable\n");
+ goto close_fail;
+ }
+ cc = dexOptCreateEmptyHeader(fd);
+ if (cc != 0)
+ goto close_fail;
+ *pNewFile = true;
+ LOGV("DexOpt: successfully initialized new cache file\n");
+ } else {
+ bool expectVerify, expectOpt;
+
+ if (gDvm.classVerifyMode == VERIFY_MODE_NONE)
+ expectVerify = false;
+ else if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE)
+ expectVerify = !isBootstrap;
+ else /*if (gDvm.classVerifyMode == VERIFY_MODE_ALL)*/
+ expectVerify = true;
+
+ if (gDvm.dexOptMode == OPTIMIZE_MODE_NONE)
+ expectOpt = false;
+ else if (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED)
+ expectOpt = expectVerify;
+ else /*if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)*/
+ expectOpt = true;
+
+ LOGV("checking deps, expecting vfy=%d opt=%d\n",
+ expectVerify, expectOpt);
+
+ if (!dvmCheckOptHeaderAndDependencies(fd, true, modWhen, crc,
+ expectVerify, expectOpt))
+ {
+ if (readOnly) {
+ /*
+ * We could unlink and rewrite the file if we own it or
+ * the "sticky" bit isn't set on the directory. However,
+ * we're not able to truncate it, which spoils things. So,
+ * give up now.
+ */
+ if (createIfMissing) {
+ LOGW("Cached DEX '%s' (%s) is stale and not writable\n",
+ fileName, cacheFileName);
+ }
+ goto close_fail;
+ }
+
+ /*
+ * If we truncate the existing file before unlinking it, any
+ * process that has it mapped will fail when it tries to touch
+ * the pages.
+ *
+ * This is very important. The zygote process will have the
+ * boot DEX files (core, framework, etc.) mapped early. If
+ * (say) core.dex gets updated, and somebody launches an app
+ * that uses App.dex, then App.dex gets reoptimized because it's
+ * dependent upon the boot classes. However, dexopt will be
+ * using the *new* core.dex to do the optimizations, while the
+ * app will actually be running against the *old* core.dex
+ * because it starts from zygote.
+ *
+ * Even without zygote, it's still possible for a class loader
+ * to pull in an APK that was optimized against an older set
+ * of DEX files. We must ensure that everything fails when a
+ * boot DEX gets updated, and for general "why aren't my
+ * changes doing anything" purposes its best if we just make
+ * everything crash when a DEX they're using gets updated.
+ */
+ LOGD("Stale deps in cache file; removing and retrying\n");
+ if (ftruncate(fd, 0) != 0) {
+ LOGW("Warning: unable to truncate cache file '%s': %s\n",
+ cacheFileName, strerror(errno));
+ /* keep going */
+ }
+ if (unlink(cacheFileName) != 0) {
+ LOGW("Warning: unable to remove cache file '%s': %d %s\n",
+ cacheFileName, errno, strerror(errno));
+ /* keep going; permission failure should probably be fatal */
+ }
+ LOGVV("DexOpt: unlocking cache file %s\n", cacheFileName);
+ flock(fd, LOCK_UN);
+ close(fd);
+ goto retry;
+ } else {
+ LOGV("DexOpt: good deps in cache file\n");
+ }
+ }
+
+ assert(fd >= 0);
+ return fd;
+
+close_fail:
+ flock(fd, LOCK_UN);
+ close(fd);
+ return -1;
+}
+
+/*
+ * Unlock the file descriptor.
+ *
+ * Returns "true" on success.
+ */
+bool dvmUnlockCachedDexFile(int fd)
+{
+ LOGVV("DexOpt: unlocking cache file fd=%d\n", fd);
+ return (flock(fd, LOCK_UN) == 0);
+}
+
+
+/*
+ * Given a descriptor for a file with DEX data in it, produce an
+ * optimized version.
+ *
+ * The file pointed to by "fd" is expected to be a locked shared resource
+ * (or private); we make no efforts to enforce multi-process correctness
+ * here.
+ *
+ * "fileName" is only used for debug output. "modWhen" and "crc" are stored
+ * in the dependency set.
+ *
+ * The "isBootstrap" flag determines how the optimizer and verifier handle
+ * package-scope access checks. When optimizing, we only load the bootstrap
+ * class DEX files and the target DEX, so the flag determines whether the
+ * target DEX classes are given a (synthetic) non-NULL classLoader pointer.
+ * This only really matters if the target DEX contains classes that claim to
+ * be in the same package as bootstrap classes.
+ *
+ * The optimizer will need to load every class in the target DEX file.
+ * This is generally undesirable, so we start a subprocess to do the
+ * work and wait for it to complete.
+ *
+ * Returns "true" on success. All data will have been written to "fd".
+ */
+bool dvmOptimizeDexFile(int fd, off_t dexOffset, long dexLength,
+ const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
+{
+ const char* lastPart = strrchr(fileName, '/');
+ if (lastPart != NULL)
+ lastPart++;
+ else
+ lastPart = fileName;
+
+ /*
+ * For basic optimizations (byte-swapping and structure aligning) we
+ * don't need to fork(). It looks like fork+exec is causing problems
+ * with gdb on our bewildered Linux distro, so in some situations we
+ * want to avoid this.
+ *
+ * For optimization and/or verification, we need to load all the classes.
+ *
+ * We don't check gDvm.generateRegisterMaps, since that is dependent
+ * upon the verifier state.
+ */
+ if (gDvm.classVerifyMode == VERIFY_MODE_NONE &&
+ (gDvm.dexOptMode == OPTIMIZE_MODE_NONE ||
+ gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED))
+ {
+ LOGD("DexOpt: --- BEGIN (quick) '%s' ---\n", lastPart);
+ return dvmContinueOptimization(fd, dexOffset, dexLength,
+ fileName, modWhen, crc, isBootstrap);
+ }
+
+
+ LOGD("DexOpt: --- BEGIN '%s' (bootstrap=%d) ---\n", lastPart, isBootstrap);
+
+ pid_t pid;
+
+ /*
+ * This could happen if something in our bootclasspath, which we thought
+ * was all optimized, got rejected.
+ */
+ if (gDvm.optimizing) {
+ LOGW("Rejecting recursive optimization attempt on '%s'\n", fileName);
+ return false;
+ }
+
+ pid = fork();
+ if (pid == 0) {
+ static const int kUseValgrind = 0;
+ static const char* kDexOptBin = "/bin/dexopt";
+ static const char* kValgrinder = "/usr/bin/valgrind";
+ static const int kFixedArgCount = 10;
+ static const int kValgrindArgCount = 5;
+ static const int kMaxIntLen = 12; // '-'+10dig+'\0' -OR- 0x+8dig
+ int bcpSize = dvmGetBootPathSize();
+ int argc = kFixedArgCount + bcpSize
+ + (kValgrindArgCount * kUseValgrind);
+ char* argv[argc+1]; // last entry is NULL
+ char values[argc][kMaxIntLen];
+ char* execFile;
+ char* androidRoot;
+ int flags;
+
+ /* full path to optimizer */
+ androidRoot = getenv("ANDROID_ROOT");
+ if (androidRoot == NULL) {
+ LOGW("ANDROID_ROOT not set, defaulting to /system\n");
+ androidRoot = "/system";
+ }
+ execFile = malloc(strlen(androidRoot) + strlen(kDexOptBin) + 1);
+ strcpy(execFile, androidRoot);
+ strcat(execFile, kDexOptBin);
+
+ /*
+ * Create arg vector.
+ */
+ int curArg = 0;
+
+ if (kUseValgrind) {
+ /* probably shouldn't ship the hard-coded path */
+ argv[curArg++] = (char*)kValgrinder;
+ argv[curArg++] = "--tool=memcheck";
+ argv[curArg++] = "--leak-check=yes"; // check for leaks too
+ argv[curArg++] = "--leak-resolution=med"; // increase from 2 to 4
+ argv[curArg++] = "--num-callers=16"; // default is 12
+ assert(curArg == kValgrindArgCount);
+ }
+ argv[curArg++] = execFile;
+
+ argv[curArg++] = "--dex";
+
+ sprintf(values[2], "%d", DALVIK_VM_BUILD);
+ argv[curArg++] = values[2];
+
+ sprintf(values[3], "%d", fd);
+ argv[curArg++] = values[3];
+
+ sprintf(values[4], "%d", (int) dexOffset);
+ argv[curArg++] = values[4];
+
+ sprintf(values[5], "%d", (int) dexLength);
+ argv[curArg++] = values[5];
+
+ argv[curArg++] = (char*)fileName;
+
+ sprintf(values[7], "%d", (int) modWhen);
+ argv[curArg++] = values[7];
+
+ sprintf(values[8], "%d", (int) crc);
+ argv[curArg++] = values[8];
+
+ flags = 0;
+ if (gDvm.dexOptMode != OPTIMIZE_MODE_NONE) {
+ flags |= DEXOPT_OPT_ENABLED;
+ if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)
+ flags |= DEXOPT_OPT_ALL;
+ }
+ if (gDvm.classVerifyMode != VERIFY_MODE_NONE) {
+ flags |= DEXOPT_VERIFY_ENABLED;
+ if (gDvm.classVerifyMode == VERIFY_MODE_ALL)
+ flags |= DEXOPT_VERIFY_ALL;
+ }
+ if (isBootstrap)
+ flags |= DEXOPT_IS_BOOTSTRAP;
+ if (gDvm.generateRegisterMaps)
+ flags |= DEXOPT_GEN_REGISTER_MAP;
+ sprintf(values[9], "%d", flags);
+ argv[curArg++] = values[9];
+
+ assert(((!kUseValgrind && curArg == kFixedArgCount) ||
+ ((kUseValgrind && curArg == kFixedArgCount+kValgrindArgCount))));
+
+ ClassPathEntry* cpe;
+ for (cpe = gDvm.bootClassPath; cpe->ptr != NULL; cpe++) {
+ argv[curArg++] = cpe->fileName;
+ }
+ assert(curArg == argc);
+
+ argv[curArg] = NULL;
+
+ if (kUseValgrind)
+ execv(kValgrinder, argv);
+ else
+ execv(execFile, argv);
+
+ LOGE("execv '%s'%s failed: %s\n", execFile,
+ kUseValgrind ? " [valgrind]" : "", strerror(errno));
+ exit(1);
+ } else {
+ LOGV("DexOpt: waiting for verify+opt, pid=%d\n", (int) pid);
+ int status;
+ pid_t gotPid;
+ int oldStatus;
+
+ /*
+ * Wait for the optimization process to finish. We go into VMWAIT
+ * mode here so GC suspension won't have to wait for us.
+ */
+ oldStatus = dvmChangeStatus(NULL, THREAD_VMWAIT);
+ while (true) {
+ gotPid = waitpid(pid, &status, 0);
+ if (gotPid == -1 && errno == EINTR) {
+ LOGD("waitpid interrupted, retrying\n");
+ } else {
+ break;
+ }
+ }
+ dvmChangeStatus(NULL, oldStatus);
+ if (gotPid != pid) {
+ LOGE("waitpid failed: wanted %d, got %d: %s\n",
+ (int) pid, (int) gotPid, strerror(errno));
+ return false;
+ }
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ LOGD("DexOpt: --- END '%s' (success) ---\n", lastPart);
+ return true;
+ } else {
+ LOGW("DexOpt: --- END '%s' --- status=0x%04x, process failed\n",
+ lastPart, status);
+ return false;
+ }
+ }
+}
+
+/*
+ * Do the actual optimization. This is called directly for "minimal"
+ * optimization, or from a newly-created process for "full" optimization.
+ *
+ * For best use of disk/memory, we want to extract once and perform
+ * optimizations in place. If the file has to expand or contract
+ * to match local structure padding/alignment expectations, we want
+ * to do the rewrite as part of the extract, rather than extracting
+ * into a temp file and slurping it back out. (The structure alignment
+ * is currently correct for all platforms, and this isn't expected to
+ * change, so we should be okay with having it already extracted.)
+ *
+ * Returns "true" on success.
+ */
+bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
+ const char* fileName, u4 modWhen, u4 crc, bool isBootstrap)
+{
+ DexClassLookup* pClassLookup = NULL;
+ IndexMapSet* pIndexMapSet = NULL;
+ bool doVerify, doOpt;
+ u4 headerFlags = 0;
+
+ if (gDvm.classVerifyMode == VERIFY_MODE_NONE)
+ doVerify = false;
+ else if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE)
+ doVerify = !isBootstrap;
+ else /*if (gDvm.classVerifyMode == VERIFY_MODE_ALL)*/
+ doVerify = true;
+
+ if (gDvm.dexOptMode == OPTIMIZE_MODE_NONE)
+ doOpt = false;
+ else if (gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED)
+ doOpt = doVerify;
+ else /*if (gDvm.dexOptMode == OPTIMIZE_MODE_ALL)*/
+ doOpt = true;
+
+ LOGV("Continuing optimization (%s, isb=%d, vfy=%d, opt=%d)\n",
+ fileName, isBootstrap, doVerify, doOpt);
+
+ assert(dexOffset >= 0);
+
+ /* quick test so we don't blow up on empty file */
+ if (dexLength < (int) sizeof(DexHeader)) {
+ LOGE("too small to be DEX\n");
+ return false;
+ }
+ if (dexOffset < (int) sizeof(DexOptHeader)) {
+ LOGE("not enough room for opt header\n");
+ return false;
+ }
+
+ bool result = false;
+
+ /*
+ * Drop this into a global so we don't have to pass it around. We could
+ * also add a field to DexFile, but since it only pertains to DEX
+ * creation that probably doesn't make sense.
+ */
+ gDvm.optimizingBootstrapClass = isBootstrap;
+
+ {
+ /*
+ * Map the entire file (so we don't have to worry about page
+ * alignment). The expectation is that the output file contains
+ * our DEX data plus room for a small header.
+ */
+ bool success;
+ void* mapAddr;
+ mapAddr = mmap(NULL, dexOffset + dexLength, PROT_READ|PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (mapAddr == MAP_FAILED) {
+ LOGE("unable to mmap DEX cache: %s\n", strerror(errno));
+ goto bail;
+ }
+
+ /*
+ * Rewrite the file. Byte reordering, structure realigning,
+ * class verification, and bytecode optimization are all performed
+ * here.
+ */
+ success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
+ doVerify, doOpt, &headerFlags, &pClassLookup);
+
+ if (success) {
+ DvmDex* pDvmDex = NULL;
+ u1* dexAddr = ((u1*) mapAddr) + dexOffset;
+
+ if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
+ LOGE("Unable to create DexFile\n");
+ } else {
+ /*
+ * If configured to do so, scan the instructions, looking
+ * for ways to reduce the size of the resolved-constant table.
+ * This is done post-optimization, across the instructions
+ * in all methods in all classes (even the ones that failed
+ * to load).
+ */
+ pIndexMapSet = dvmRewriteConstants(pDvmDex);
+
+ updateChecksum(dexAddr, dexLength,
+ (DexHeader*) pDvmDex->pHeader);
+
+ dvmDexFileFree(pDvmDex);
+ }
+ }
+
+ /* unmap the read-write version, forcing writes to disk */
+ if (msync(mapAddr, dexOffset + dexLength, MS_SYNC) != 0) {
+ LOGW("msync failed: %s\n", strerror(errno));
+ // weird, but keep going
+ }
+#if 1
+ /*
+ * This causes clean shutdown to fail, because we have loaded classes
+ * that point into it. For the optimizer this isn't a problem,
+ * because it's more efficient for the process to simply exit.
+ * Exclude this code when doing clean shutdown for valgrind.
+ */
+ if (munmap(mapAddr, dexOffset + dexLength) != 0) {
+ LOGE("munmap failed: %s\n", strerror(errno));
+ goto bail;
+ }
+#endif
+
+ if (!success)
+ goto bail;
+ }
+
+ /* get start offset, and adjust deps start for 64-bit alignment */
+ off_t depsOffset, auxOffset, endOffset, adjOffset;
+ int depsLength, auxLength;
+
+ depsOffset = lseek(fd, 0, SEEK_END);
+ if (depsOffset < 0) {
+ LOGE("lseek to EOF failed: %s\n", strerror(errno));
+ goto bail;
+ }
+ adjOffset = (depsOffset + 7) & ~(0x07);
+ if (adjOffset != depsOffset) {
+ LOGV("Adjusting deps start from %d to %d\n",
+ (int) depsOffset, (int) adjOffset);
+ depsOffset = adjOffset;
+ lseek(fd, depsOffset, SEEK_SET);
+ }
+
+ /*
+ * Append the dependency list.
+ */
+ if (writeDependencies(fd, modWhen, crc) != 0) {
+ LOGW("Failed writing dependencies\n");
+ goto bail;
+ }
+
+
+ /* compute deps length, and adjust aux start for 64-bit alignment */
+ auxOffset = lseek(fd, 0, SEEK_END);
+ depsLength = auxOffset - depsOffset;
+
+ adjOffset = (auxOffset + 7) & ~(0x07);
+ if (adjOffset != auxOffset) {
+ LOGV("Adjusting aux start from %d to %d\n",
+ (int) auxOffset, (int) adjOffset);
+ auxOffset = adjOffset;
+ lseek(fd, auxOffset, SEEK_SET);
+ }
+
+ /*
+ * Append any auxillary pre-computed data structures.
+ */
+ if (!writeAuxData(fd, pClassLookup, pIndexMapSet)) {
+ LOGW("Failed writing aux data\n");
+ goto bail;
+ }
+
+ endOffset = lseek(fd, 0, SEEK_END);
+ auxLength = endOffset - auxOffset;
+
+ /*
+ * Output the "opt" header with all values filled in and a correct
+ * magic number.
+ */
+ DexOptHeader optHdr;
+ memset(&optHdr, 0xff, sizeof(optHdr));
+ memcpy(optHdr.magic, DEX_OPT_MAGIC, 4);
+ memcpy(optHdr.magic+4, DEX_OPT_MAGIC_VERS, 4);
+ optHdr.dexOffset = (u4) dexOffset;
+ optHdr.dexLength = (u4) dexLength;
+ optHdr.depsOffset = (u4) depsOffset;
+ optHdr.depsLength = (u4) depsLength;
+ optHdr.auxOffset = (u4) auxOffset;
+ optHdr.auxLength = (u4) auxLength;
+
+ optHdr.flags = headerFlags;
+
+ ssize_t actual;
+ lseek(fd, 0, SEEK_SET);
+ actual = write(fd, &optHdr, sizeof(optHdr));
+ if (actual != sizeof(optHdr)) {
+ logFailedWrite(sizeof(optHdr), actual, "opt header", errno);
+ goto bail;
+ }
+
+ LOGV("Successfully wrote DEX header\n");
+ result = true;
+
+bail:
+ dvmFreeIndexMapSet(pIndexMapSet);
+ free(pClassLookup);
+ return result;
+}
+
+
+/*
+ * Get the cache file name from a ClassPathEntry.
+ */
+static const char* getCacheFileName(const ClassPathEntry* cpe)
+{
+ switch (cpe->kind) {
+ case kCpeJar:
+ return dvmGetJarFileCacheFileName((JarFile*) cpe->ptr);
+ case kCpeDex:
+ return dvmGetRawDexFileCacheFileName((RawDexFile*) cpe->ptr);
+ default:
+ LOGE("DexOpt: unexpected cpe kind %d\n", cpe->kind);
+ dvmAbort();
+ return NULL;
+ }
+}
+
+/*
+ * Get the SHA-1 signature.
+ */
+static const u1* getSignature(const ClassPathEntry* cpe)
+{
+ DvmDex* pDvmDex;
+
+ switch (cpe->kind) {
+ case kCpeJar:
+ pDvmDex = dvmGetJarFileDex((JarFile*) cpe->ptr);
+ break;
+ case kCpeDex:
+ pDvmDex = dvmGetRawDexFileDex((RawDexFile*) cpe->ptr);
+ break;
+ default:
+ LOGE("unexpected cpe kind %d\n", cpe->kind);
+ dvmAbort();
+ pDvmDex = NULL; // make gcc happy
+ }
+
+ assert(pDvmDex != NULL);
+ return pDvmDex->pDexFile->pHeader->signature;
+}
+
+
+/*
+ * Dependency layout:
+ * 4b Source file modification time, in seconds since 1970 UTC
+ * 4b CRC-32 from Zip entry, or Adler32 from source DEX header
+ * 4b Dalvik VM build number
+ * 4b Number of dependency entries that follow
+ * Dependency entries:
+ * 4b Name length (including terminating null)
+ * var Full path of cache entry (null terminated)
+ * 20b SHA-1 signature from source DEX file
+ *
+ * If this changes, update DEX_OPT_MAGIC_VERS.
+ */
+static const size_t kMinDepSize = 4 * 4;
+static const size_t kMaxDepSize = 4 * 4 + 1024; // sanity check
+
+/*
+ * Read the "opt" header, verify it, then read the dependencies section
+ * and verify that data as well.
+ *
+ * If "sourceAvail" is "true", this will verify that "modWhen" and "crc"
+ * match up with what is stored in the header. If they don't, we reject
+ * the file so that it can be recreated from the updated original. If
+ * "sourceAvail" isn't set, e.g. for a .odex file, we ignore these arguments.
+ *
+ * On successful return, the file will be seeked immediately past the
+ * "opt" header.
+ */
+bool dvmCheckOptHeaderAndDependencies(int fd, bool sourceAvail, u4 modWhen,
+ u4 crc, bool expectVerify, bool expectOpt)
+{
+ DexOptHeader optHdr;
+ u1* depData = NULL;
+ const u1* magic;
+ off_t posn;
+ int result = false;
+ ssize_t actual;
+
+ /*
+ * Start at the start. The "opt" header, when present, will always be
+ * the first thing in the file.
+ */
+ if (lseek(fd, 0, SEEK_SET) != 0) {
+ LOGE("DexOpt: failed to seek to start of file: %s\n", strerror(errno));
+ goto bail;
+ }
+
+ /*
+ * Read and do trivial verification on the opt header. The header is
+ * always in host byte order.
+ */
+ if (read(fd, &optHdr, sizeof(optHdr)) != sizeof(optHdr)) {
+ LOGE("DexOpt: failed reading opt header: %s\n", strerror(errno));
+ goto bail;
+ }
+
+ magic = optHdr.magic;
+ if (memcmp(magic, DEX_OPT_MAGIC, 4) != 0) {
+ /* not a DEX file, or previous attempt was interrupted */
+ LOGD("DexOpt: incorrect opt magic number (0x%02x %02x %02x %02x)\n",
+ magic[0], magic[1], magic[2], magic[3]);
+ goto bail;
+ }
+ if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
+ LOGW("DexOpt: stale opt version (0x%02x %02x %02x %02x)\n",
+ magic[4], magic[5], magic[6], magic[7]);
+ goto bail;
+ }
+ if (optHdr.depsLength < kMinDepSize || optHdr.depsLength > kMaxDepSize) {
+ LOGW("DexOpt: weird deps length %d, bailing\n", optHdr.depsLength);
+ goto bail;
+ }
+
+ /*
+ * Do the header flags match up with what we want?
+ *
+ * This is useful because it allows us to automatically regenerate
+ * a file when settings change (e.g. verification is now mandatory),
+ * but can cause difficulties if the bootstrap classes we depend upon
+ * were handled differently than the current options specify. We get
+ * upset because they're not verified or optimized, but we're not able
+ * to regenerate them because the installer won't let us.
+ *
+ * (This is also of limited value when !sourceAvail.)
+ *
+ * So, for now, we essentially ignore "expectVerify" and "expectOpt"
+ * by limiting the match mask.
+ *
+ * The only thing we really can't handle is incorrect byte-ordering.
+ */
+ const u4 matchMask = DEX_OPT_FLAG_BIG;
+ u4 expectedFlags = 0;
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+ expectedFlags |= DEX_OPT_FLAG_BIG;
+#endif
+ if (expectVerify)
+ expectedFlags |= DEX_FLAG_VERIFIED;
+ if (expectOpt)
+ expectedFlags |= DEX_OPT_FLAG_FIELDS | DEX_OPT_FLAG_INVOCATIONS;
+ if ((expectedFlags & matchMask) != (optHdr.flags & matchMask)) {
+ LOGI("DexOpt: header flag mismatch (0x%02x vs 0x%02x, mask=0x%02x)\n",
+ expectedFlags, optHdr.flags, matchMask);
+ goto bail;
+ }
+
+ posn = lseek(fd, optHdr.depsOffset, SEEK_SET);
+ if (posn < 0) {
+ LOGW("DexOpt: seek to deps failed: %s\n", strerror(errno));
+ goto bail;
+ }
+
+ /*
+ * Read all of the dependency stuff into memory.
+ */
+ depData = (u1*) malloc(optHdr.depsLength);
+ if (depData == NULL) {
+ LOGW("DexOpt: unable to allocate %d bytes for deps\n",
+ optHdr.depsLength);
+ goto bail;
+ }
+ actual = read(fd, depData, optHdr.depsLength);
+ if (actual != (ssize_t) optHdr.depsLength) {
+ LOGW("DexOpt: failed reading deps: %d of %d (err=%s)\n",
+ (int) actual, optHdr.depsLength, strerror(errno));
+ goto bail;
+ }
+
+ /*
+ * Verify simple items.
+ */
+ const u1* ptr;
+ u4 val;
+
+ ptr = depData;
+ val = read4LE(&ptr);
+ if (sourceAvail && val != modWhen) {
+ LOGI("DexOpt: source file mod time mismatch (%08x vs %08x)\n",
+ val, modWhen);
+ goto bail;
+ }
+ val = read4LE(&ptr);
+ if (sourceAvail && val != crc) {
+ LOGI("DexOpt: source file CRC mismatch (%08x vs %08x)\n", val, crc);
+ goto bail;
+ }
+ val = read4LE(&ptr);
+ if (val != DALVIK_VM_BUILD) {
+ LOGI("DexOpt: VM build mismatch (%d vs %d)\n", val, DALVIK_VM_BUILD);
+ goto bail;
+ }
+
+ /*
+ * Verify dependencies on other cached DEX files. It must match
+ * exactly with what is currently defined in the bootclasspath.
+ */
+ ClassPathEntry* cpe;
+ u4 numDeps;
+
+ numDeps = read4LE(&ptr);
+ LOGV("+++ DexOpt: numDeps = %d\n", numDeps);
+ for (cpe = gDvm.bootClassPath; cpe->ptr != NULL; cpe++) {
+ const char* cacheFileName = getCacheFileName(cpe);
+ const u1* signature = getSignature(cpe);
+ size_t len = strlen(cacheFileName) +1;
+ u4 storedStrLen;
+
+ if (numDeps == 0) {
+ /* more entries in bootclasspath than in deps list */
+ LOGI("DexOpt: not all deps represented\n");
+ goto bail;
+ }
+
+ storedStrLen = read4LE(&ptr);
+ if (len != storedStrLen ||
+ strcmp(cacheFileName, (const char*) ptr) != 0)
+ {
+ LOGI("DexOpt: mismatch dep name: '%s' vs. '%s'\n",
+ cacheFileName, ptr);
+ goto bail;
+ }
+
+ ptr += storedStrLen;
+
+ if (memcmp(signature, ptr, kSHA1DigestLen) != 0) {
+ LOGI("DexOpt: mismatch dep signature for '%s'\n", cacheFileName);
+ goto bail;
+ }
+ ptr += kSHA1DigestLen;
+
+ LOGV("DexOpt: dep match on '%s'\n", cacheFileName);
+
+ numDeps--;
+ }
+
+ if (numDeps != 0) {
+ /* more entries in deps list than in classpath */
+ LOGI("DexOpt: Some deps went away\n");
+ goto bail;
+ }
+
+ // consumed all data and no more?
+ if (ptr != depData + optHdr.depsLength) {
+ LOGW("DexOpt: Spurious dep data? %d vs %d\n",
+ (int) (ptr - depData), optHdr.depsLength);
+ assert(false);
+ }
+
+ result = true;
+
+bail:
+ free(depData);
+ return result;
+}
+
+/*
+ * Write the dependency info to "fd" at the current file position.
+ */
+static int writeDependencies(int fd, u4 modWhen, u4 crc)
+{
+ u1* buf = NULL;
+ ssize_t actual;
+ int result = -1;
+ ssize_t bufLen;
+ ClassPathEntry* cpe;
+ int i, numDeps;
+
+ /*
+ * Count up the number of completed entries in the bootclasspath.
+ */
+ numDeps = 0;
+ bufLen = 0;
+ for (cpe = gDvm.bootClassPath; cpe->ptr != NULL; cpe++) {
+ const char* cacheFileName = getCacheFileName(cpe);
+ LOGV("+++ DexOpt: found dep '%s'\n", cacheFileName);
+
+ numDeps++;
+ bufLen += strlen(cacheFileName) +1;
+ }
+
+ bufLen += 4*4 + numDeps * (4+kSHA1DigestLen);
+
+ buf = malloc(bufLen);
+
+ set4LE(buf+0, modWhen);
+ set4LE(buf+4, crc);
+ set4LE(buf+8, DALVIK_VM_BUILD);
+ set4LE(buf+12, numDeps);
+
+ // TODO: do we want to add dvmGetInlineOpsTableLength() here? Won't
+ // help us if somebody replaces an existing entry, but it'd catch
+ // additions/removals.
+
+ u1* ptr = buf + 4*4;
+ for (cpe = gDvm.bootClassPath; cpe->ptr != NULL; cpe++) {
+ const char* cacheFileName = getCacheFileName(cpe);
+ const u1* signature = getSignature(cpe);
+ int len = strlen(cacheFileName) +1;
+
+ if (ptr + 4 + len + kSHA1DigestLen > buf + bufLen) {
+ LOGE("DexOpt: overran buffer\n");
+ dvmAbort();
+ }
+
+ set4LE(ptr, len);
+ ptr += 4;
+ memcpy(ptr, cacheFileName, len);
+ ptr += len;
+ memcpy(ptr, signature, kSHA1DigestLen);
+ ptr += kSHA1DigestLen;
+ }
+
+ assert(ptr == buf + bufLen);
+
+ actual = write(fd, buf, bufLen);
+ if (actual != bufLen) {
+ result = (errno != 0) ? errno : -1;
+ logFailedWrite(bufLen, actual, "dep info", errno);
+ } else {
+ result = 0;
+ }
+
+ free(buf);
+ return result;
+}
+
+
+/*
+ * Write a block of data in "chunk" format.
+ *
+ * The chunk header fields are always in "native" byte order. If "size"
+ * is not a multiple of 8 bytes, the data area is padded out.
+ */
+static bool writeChunk(int fd, u4 type, const void* data, size_t size)
+{
+ ssize_t actual;
+ union { /* save a syscall by grouping these together */
+ char raw[8];
+ struct {
+ u4 type;
+ u4 size;
+ } ts;
+ } header;
+
+ assert(sizeof(header) == 8);
+
+ LOGV("Writing chunk, type=%.4s size=%d\n", (char*) &type, size);
+
+ header.ts.type = type;
+ header.ts.size = (u4) size;
+ actual = write(fd, &header, sizeof(header));
+ if (actual != sizeof(header)) {
+ logFailedWrite(size, actual, "aux chunk header write", errno);
+ return false;
+ }
+
+ if (size > 0) {
+ actual = write(fd, data, size);
+ if (actual != (ssize_t) size) {
+ logFailedWrite(size, actual, "aux chunk write", errno);
+ return false;
+ }
+ }
+
+ /* if necessary, pad to 64-bit alignment */
+ if ((size & 7) != 0) {
+ int padSize = 8 - (size & 7);
+ LOGV("size was %d, inserting %d pad bytes\n", size, padSize);
+ lseek(fd, padSize, SEEK_CUR);
+ }
+
+ assert( ((int)lseek(fd, 0, SEEK_CUR) & 7) == 0);
+
+ return true;
+}
+
+/*
+ * Write aux data.
+ *
+ * We have different pieces, some of which may be optional. To make the
+ * most effective use of space, we use a "chunk" format, with a 4-byte
+ * type and a 4-byte length. We guarantee 64-bit alignment for the data,
+ * so it can be used directly when the file is mapped for reading.
+ */
+static bool writeAuxData(int fd, const DexClassLookup* pClassLookup,
+ const IndexMapSet* pIndexMapSet)
+{
+ /* pre-computed class lookup hash table */
+ if (!writeChunk(fd, (u4) kDexChunkClassLookup, pClassLookup,
+ pClassLookup->size))
+ {
+ return false;
+ }
+
+ /* remapped constants (optional) */
+ if (pIndexMapSet != NULL) {
+ if (!writeChunk(fd, pIndexMapSet->chunkType, pIndexMapSet->chunkData,
+ pIndexMapSet->chunkDataLen))
+ {
+ return false;
+ }
+ }
+
+ /* write the end marker */
+ if (!writeChunk(fd, (u4) kDexChunkEnd, NULL, 0)) {
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Log a failed write.
+ */
+static void logFailedWrite(size_t expected, ssize_t actual, const char* msg,
+ int err)
+{
+ LOGE("Write failed: %s (%d of %d): %s\n",
+ msg, (int)actual, (int)expected, strerror(err));
+}
+
+
+/*
+ * ===========================================================================
+ * Optimizations
+ * ===========================================================================
+ */
+
+/*
+ * Perform in-place rewrites on a memory-mapped DEX file.
+ *
+ * This happens in a short-lived child process, so we can go nutty with
+ * loading classes and allocating memory.
+ */
+static bool rewriteDex(u1* addr, int len, bool doVerify, bool doOpt,
+ u4* pHeaderFlags, DexClassLookup** ppClassLookup)
+{
+ u8 prepWhen, loadWhen, verifyWhen, optWhen;
+ DvmDex* pDvmDex = NULL;
+ bool result = false;
+
+ *pHeaderFlags = 0;
+
+ LOGV("+++ swapping bytes\n");
+ if (dexFixByteOrdering(addr, len) != 0)
+ goto bail;
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+ *pHeaderFlags |= DEX_OPT_FLAG_BIG;
+#endif
+
+ /*
+ * Now that the DEX file can be read directly, create a DexFile for it.
+ */
+ if (dvmDexFileOpenPartial(addr, len, &pDvmDex) != 0) {
+ LOGE("Unable to create DexFile\n");
+ goto bail;
+ }
+
+ /*
+ * Create the class lookup table.
+ */
+ //startWhen = dvmGetRelativeTimeUsec();
+ *ppClassLookup = dexCreateClassLookup(pDvmDex->pDexFile);
+ if (*ppClassLookup == NULL)
+ goto bail;
+
+ /*
+ * Bail out early if they don't want The Works. The current implementation
+ * doesn't fork a new process if this flag isn't set, so we really don't
+ * want to continue on with the crazy class loading.
+ */
+ if (!doVerify && !doOpt) {
+ result = true;
+ goto bail;
+ }
+
+ /* this is needed for the next part */
+ pDvmDex->pDexFile->pClassLookup = *ppClassLookup;
+
+ prepWhen = dvmGetRelativeTimeUsec();
+
+ /*
+ * Load all classes found in this DEX file. If they fail to load for
+ * some reason, they won't get verified (which is as it should be).
+ */
+ if (!loadAllClasses(pDvmDex))
+ goto bail;
+ loadWhen = dvmGetRelativeTimeUsec();
+
+ /*
+ * Verify all classes in the DEX file. Export the "is verified" flag
+ * to the DEX file we're creating.
+ */
+ if (doVerify) {
+ dvmVerifyAllClasses(pDvmDex->pDexFile);
+ *pHeaderFlags |= DEX_FLAG_VERIFIED;
+ }
+ verifyWhen = dvmGetRelativeTimeUsec();
+
+ /*
+ * Optimize the classes we successfully loaded. If the opt mode is
+ * OPTIMIZE_MODE_VERIFIED, each class must have been successfully
+ * verified or we'll skip it.
+ */
+#ifndef PROFILE_FIELD_ACCESS
+ if (doOpt) {
+ optimizeLoadedClasses(pDvmDex->pDexFile);
+ *pHeaderFlags |= DEX_OPT_FLAG_FIELDS | DEX_OPT_FLAG_INVOCATIONS;
+ }
+#endif
+ optWhen = dvmGetRelativeTimeUsec();
+
+ LOGD("DexOpt: load %dms, verify %dms, opt %dms\n",
+ (int) (loadWhen - prepWhen) / 1000,
+ (int) (verifyWhen - loadWhen) / 1000,
+ (int) (optWhen - verifyWhen) / 1000);
+
+ result = true;
+
+bail:
+ /* free up storage */
+ dvmDexFileFree(pDvmDex);
+
+ return result;
+}
+
+/*
+ * Update the Adler-32 checksum stored in the DEX file. This covers the
+ * swapped and optimized DEX data, but does not include the opt header
+ * or auxillary data.
+ */
+static void updateChecksum(u1* addr, int len, DexHeader* pHeader)
+{
+ /*
+ * Rewrite the checksum. We leave the SHA-1 signature alone.
+ */
+ uLong adler = adler32(0L, Z_NULL, 0);
+ const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
+
+ adler = adler32(adler, addr + nonSum, len - nonSum);
+ pHeader->checksum = adler;
+}
+
+/*
+ * Try to load all classes in the specified DEX. If they have some sort
+ * of broken dependency, e.g. their superclass lives in a different DEX
+ * that wasn't previously loaded into the bootstrap class path, loading
+ * will fail. This is the desired behavior.
+ *
+ * We have no notion of class loader at this point, so we load all of
+ * the classes with the bootstrap class loader. It turns out this has
+ * exactly the behavior we want, and has no ill side effects because we're
+ * running in a separate process and anything we load here will be forgotten.
+ *
+ * We set the CLASS_MULTIPLE_DEFS flag here if we see multiple definitions.
+ * This works because we only call here as part of optimization / pre-verify,
+ * not during verification as part of loading a class into a running VM.
+ *
+ * This returns "false" if the world is too screwed up to do anything
+ * useful at all.
+ */
+static bool loadAllClasses(DvmDex* pDvmDex)
+{
+ u4 count = pDvmDex->pDexFile->pHeader->classDefsSize;
+ u4 idx;
+ int loaded = 0;
+
+ LOGV("DexOpt: +++ trying to load %d classes\n", count);
+
+ dvmSetBootPathExtraDex(pDvmDex);
+
+ /*
+ * We have some circularity issues with Class and Object that are most
+ * easily avoided by ensuring that Object is never the first thing we
+ * try to find. Take care of that here. (We only need to do this when
+ * loading classes from the DEX file that contains Object, and only
+ * when Object comes first in the list, but it costs very little to
+ * do it in all cases.)
+ */
+ if (dvmFindSystemClass("Ljava/lang/Class;") == NULL) {
+ LOGE("ERROR: java.lang.Class does not exist!\n");
+ return false;
+ }
+
+ for (idx = 0; idx < count; idx++) {
+ const DexClassDef* pClassDef;
+ const char* classDescriptor;
+ ClassObject* newClass;
+
+ pClassDef = dexGetClassDef(pDvmDex->pDexFile, idx);
+ classDescriptor =
+ dexStringByTypeIdx(pDvmDex->pDexFile, pClassDef->classIdx);
+
+ LOGV("+++ loading '%s'", classDescriptor);
+ //newClass = dvmDefineClass(pDexFile, classDescriptor,
+ // NULL);
+ newClass = dvmFindSystemClassNoInit(classDescriptor);
+ if (newClass == NULL) {
+ LOGV("DexOpt: failed loading '%s'\n", classDescriptor);
+ dvmClearOptException(dvmThreadSelf());
+ } else if (newClass->pDvmDex != pDvmDex) {
+ /*
+ * We don't load the new one, and we tag the first one found
+ * with the "multiple def" flag so the resolver doesn't try
+ * to make it available.
+ */
+ LOGD("DexOpt: '%s' has an earlier definition; blocking out\n",
+ classDescriptor);
+ SET_CLASS_FLAG(newClass, CLASS_MULTIPLE_DEFS);
+ } else {
+ loaded++;
+ }
+ }
+ LOGV("DexOpt: +++ successfully loaded %d classes\n", loaded);
+
+ dvmSetBootPathExtraDex(NULL);
+ return true;
+}
+
+
+/*
+ * Create a table of inline substitutions.
+ *
+ * TODO: this is currently just a linear array. We will want to put this
+ * into a hash table as the list size increases.
+ */
+static InlineSub* createInlineSubsTable(void)
+{
+ const InlineOperation* ops = dvmGetInlineOpsTable();
+ const int count = dvmGetInlineOpsTableLength();
+ InlineSub* table;
+ Method* method;
+ ClassObject* clazz;
+ int i, tableIndex;
+
+ /*
+ * Allocate for optimism: one slot per entry, plus an end-of-list marker.
+ */
+ table = malloc(sizeof(InlineSub) * (count+1));
+
+ tableIndex = 0;
+ for (i = 0; i < count; i++) {
+ clazz = dvmFindClassNoInit(ops[i].classDescriptor, NULL);
+ if (clazz == NULL) {
+ LOGV("DexOpt: can't inline for class '%s': not found\n",
+ ops[i].classDescriptor);
+ dvmClearOptException(dvmThreadSelf());
+ } else {
+ /*
+ * Method could be virtual or direct. Try both. Don't use
+ * the "hier" versions.
+ */
+ method = dvmFindDirectMethodByDescriptor(clazz, ops[i].methodName,
+ ops[i].methodSignature);
+ if (method == NULL)
+ method = dvmFindVirtualMethodByDescriptor(clazz, ops[i].methodName,
+ ops[i].methodSignature);
+ if (method == NULL) {
+ LOGW("DexOpt: can't inline %s.%s %s: method not found\n",
+ ops[i].classDescriptor, ops[i].methodName,
+ ops[i].methodSignature);
+ } else {
+ if (!dvmIsFinalClass(clazz) && !dvmIsFinalMethod(method)) {
+ LOGW("DexOpt: WARNING: inline op on non-final class/method "
+ "%s.%s\n",
+ clazz->descriptor, method->name);
+ /* fail? */
+ }
+ if (dvmIsSynchronizedMethod(method) ||
+ dvmIsDeclaredSynchronizedMethod(method))
+ {
+ LOGW("DexOpt: WARNING: inline op on synchronized method "
+ "%s.%s\n",
+ clazz->descriptor, method->name);
+ /* fail? */
+ }
+
+ table[tableIndex].method = method;
+ table[tableIndex].inlineIdx = i;
+ tableIndex++;
+
+ LOGV("DexOpt: will inline %d: %s.%s %s\n", i,
+ ops[i].classDescriptor, ops[i].methodName,
+ ops[i].methodSignature);
+ }
+ }
+ }
+
+ /* mark end of table */
+ table[tableIndex].method = NULL;
+ LOGV("DexOpt: inline table has %d entries\n", tableIndex);
+
+ return table;
+}
+
+/*
+ * Run through all classes that were successfully loaded from this DEX
+ * file and optimize their code sections.
+ */
+static void optimizeLoadedClasses(DexFile* pDexFile)
+{
+ u4 count = pDexFile->pHeader->classDefsSize;
+ u4 idx;
+ InlineSub* inlineSubs = NULL;
+
+ LOGV("DexOpt: +++ optimizing up to %d classes\n", count);
+ assert(gDvm.dexOptMode != OPTIMIZE_MODE_NONE);
+
+ inlineSubs = createInlineSubsTable();
+
+ for (idx = 0; idx < count; idx++) {
+ const DexClassDef* pClassDef;
+ const char* classDescriptor;
+ ClassObject* clazz;
+
+ pClassDef = dexGetClassDef(pDexFile, idx);
+ classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+ /* all classes are loaded into the bootstrap class loader */
+ clazz = dvmLookupClass(classDescriptor, NULL, false);
+ if (clazz != NULL) {
+ if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) == 0 &&
+ gDvm.dexOptMode == OPTIMIZE_MODE_VERIFIED)
+ {
+ LOGV("DexOpt: not optimizing '%s': not verified\n",
+ classDescriptor);
+ } else if (clazz->pDvmDex->pDexFile != pDexFile) {
+ /* shouldn't be here -- verifier should have caught */
+ LOGD("DexOpt: not optimizing '%s': multiple definitions\n",
+ classDescriptor);
+ } else {
+ optimizeClass(clazz, inlineSubs);
+
+ /* set the flag whether or not we actually did anything */
+ ((DexClassDef*)pClassDef)->accessFlags |=
+ CLASS_ISOPTIMIZED;
+ }
+ } else {
+ LOGV("DexOpt: not optimizing unavailable class '%s'\n",
+ classDescriptor);
+ }
+ }
+
+ free(inlineSubs);
+}
+
+/*
+ * Optimize the specified class.
+ */
+static void optimizeClass(ClassObject* clazz, const InlineSub* inlineSubs)
+{
+ int i;
+
+ for (i = 0; i < clazz->directMethodCount; i++) {
+ if (!optimizeMethod(&clazz->directMethods[i], inlineSubs))
+ goto fail;
+ }
+ for (i = 0; i < clazz->virtualMethodCount; i++) {
+ if (!optimizeMethod(&clazz->virtualMethods[i], inlineSubs))
+ goto fail;
+ }
+
+ return;
+
+fail:
+ LOGV("DexOpt: ceasing optimization attempts on %s\n", clazz->descriptor);
+}
+
+/*
+ * Optimize instructions in a method.
+ *
+ * Returns "true" if all went well, "false" if we bailed out early when
+ * something failed.
+ */
+static bool optimizeMethod(Method* method, const InlineSub* inlineSubs)
+{
+ u4 insnsSize;
+ u2* insns;
+ u2 inst;
+
+ if (dvmIsNativeMethod(method) || dvmIsAbstractMethod(method))
+ return true;
+
+ insns = (u2*) method->insns;
+ assert(insns != NULL);
+ insnsSize = dvmGetMethodInsnsSize(method);
+
+ while (insnsSize > 0) {
+ int width;
+
+ inst = *insns & 0xff;
+
+ switch (inst) {
+ case OP_IGET:
+ case OP_IGET_BOOLEAN:
+ case OP_IGET_BYTE:
+ case OP_IGET_CHAR:
+ case OP_IGET_SHORT:
+ rewriteInstField(method, insns, OP_IGET_QUICK);
+ break;
+ case OP_IGET_WIDE:
+ rewriteInstField(method, insns, OP_IGET_WIDE_QUICK);
+ break;
+ case OP_IGET_OBJECT:
+ rewriteInstField(method, insns, OP_IGET_OBJECT_QUICK);
+ break;
+ case OP_IPUT:
+ case OP_IPUT_BOOLEAN:
+ case OP_IPUT_BYTE:
+ case OP_IPUT_CHAR:
+ case OP_IPUT_SHORT:
+ rewriteInstField(method, insns, OP_IPUT_QUICK);
+ break;
+ case OP_IPUT_WIDE:
+ rewriteInstField(method, insns, OP_IPUT_WIDE_QUICK);
+ break;
+ case OP_IPUT_OBJECT:
+ rewriteInstField(method, insns, OP_IPUT_OBJECT_QUICK);
+ break;
+
+ case OP_INVOKE_VIRTUAL:
+ if (!rewriteExecuteInline(method, insns, METHOD_VIRTUAL,inlineSubs))
+ {
+ if (!rewriteVirtualInvoke(method, insns, OP_INVOKE_VIRTUAL_QUICK))
+ return false;
+ }
+ break;
+ case OP_INVOKE_VIRTUAL_RANGE:
+ if (!rewriteVirtualInvoke(method, insns, OP_INVOKE_VIRTUAL_QUICK_RANGE))
+ return false;
+ break;
+ case OP_INVOKE_SUPER:
+ if (!rewriteVirtualInvoke(method, insns, OP_INVOKE_SUPER_QUICK))
+ return false;
+ break;
+ case OP_INVOKE_SUPER_RANGE:
+ if (!rewriteVirtualInvoke(method, insns, OP_INVOKE_SUPER_QUICK_RANGE))
+ return false;
+ break;
+
+ case OP_INVOKE_DIRECT:
+ if (!rewriteExecuteInline(method, insns, METHOD_DIRECT, inlineSubs))
+ {
+ if (!rewriteDirectInvoke(method, insns))
+ return false;
+ }
+ break;
+ case OP_INVOKE_STATIC:
+ rewriteExecuteInline(method, insns, METHOD_STATIC, inlineSubs);
+ break;
+
+ default:
+ // ignore this instruction
+ ;
+ }
+
+ if (*insns == kPackedSwitchSignature) {
+ width = 4 + insns[1] * 2;
+ } else if (*insns == kSparseSwitchSignature) {
+ width = 2 + insns[1] * 4;
+ } else if (*insns == kArrayDataSignature) {
+ u2 elemWidth = insns[1];
+ u4 len = insns[2] | (((u4)insns[3]) << 16);
+ width = 4 + (elemWidth * len + 1) / 2;
+ } else {
+ width = dexGetInstrWidth(gDvm.instrWidth, inst);
+ }
+ assert(width > 0);
+
+ insns += width;
+ insnsSize -= width;
+ }
+
+ assert(insnsSize == 0);
+ return true;
+}
+
+
+/*
+ * If "referrer" and "resClass" don't come from the same DEX file, and
+ * the DEX we're working on is not destined for the bootstrap class path,
+ * tweak the class loader so package-access checks work correctly.
+ *
+ * Only do this if we're doing pre-verification or optimization.
+ */
+static void tweakLoader(ClassObject* referrer, ClassObject* resClass)
+{
+ if (!gDvm.optimizing)
+ return;
+ assert(referrer->classLoader == NULL);
+ assert(resClass->classLoader == NULL);
+
+ if (!gDvm.optimizingBootstrapClass) {
+ /* class loader for an array class comes from element type */
+ if (dvmIsArrayClass(resClass))
+ resClass = resClass->elementClass;
+ if (referrer->pDvmDex != resClass->pDvmDex)
+ resClass->classLoader = (Object*) 0xdead3333;
+ }
+}
+
+/*
+ * Undo the effects of tweakLoader.
+ */
+static void untweakLoader(ClassObject* referrer, ClassObject* resClass)
+{
+ if (!gDvm.optimizing || gDvm.optimizingBootstrapClass)
+ return;
+
+ if (dvmIsArrayClass(resClass))
+ resClass = resClass->elementClass;
+ resClass->classLoader = NULL;
+}
+
+
+/*
+ * Alternate version of dvmResolveClass for use with verification and
+ * optimization. Performs access checks on every resolve, and refuses
+ * to acknowledge the existence of classes defined in more than one DEX
+ * file.
+ *
+ * Exceptions caused by failures are cleared before returning.
+ */
+ClassObject* dvmOptResolveClass(ClassObject* referrer, u4 classIdx)
+{
+ DvmDex* pDvmDex = referrer->pDvmDex;
+ ClassObject* resClass;
+
+ /*
+ * Check the table first. If not there, do the lookup by name.
+ */
+ resClass = dvmDexGetResolvedClass(pDvmDex, classIdx);
+ if (resClass == NULL) {
+ const char* className = dexStringByTypeIdx(pDvmDex->pDexFile, classIdx);
+ if (className[0] != '\0' && className[1] == '\0') {
+ /* primitive type */
+ resClass = dvmFindPrimitiveClass(className[0]);
+ } else {
+ resClass = dvmFindClassNoInit(className, referrer->classLoader);
+ }
+ if (resClass == NULL) {
+ /* not found, exception should be raised */
+ LOGV("DexOpt: class %d (%s) not found\n",
+ classIdx,
+ dexStringByTypeIdx(pDvmDex->pDexFile, classIdx));
+ dvmClearOptException(dvmThreadSelf());
+ return NULL;
+ }
+
+ /*
+ * Add it to the resolved table so we're faster on the next lookup.
+ */
+ dvmDexSetResolvedClass(pDvmDex, classIdx, resClass);
+ }
+
+ /* multiple definitions? */
+ if (IS_CLASS_FLAG_SET(resClass, CLASS_MULTIPLE_DEFS)) {
+ LOGI("DexOpt: not resolving ambiguous class '%s'\n",
+ resClass->descriptor);
+ return NULL;
+ }
+
+ /* access allowed? */
+ tweakLoader(referrer, resClass);
+ bool allowed = dvmCheckClassAccess(referrer, resClass);
+ untweakLoader(referrer, resClass);
+ if (!allowed) {
+ LOGW("DexOpt: resolve class illegal access: %s -> %s\n",
+ referrer->descriptor, resClass->descriptor);
+ return NULL;
+ }
+
+ return resClass;
+}
+
+/*
+ * Alternate version of dvmResolveInstField().
+ */
+InstField* dvmOptResolveInstField(ClassObject* referrer, u4 ifieldIdx)
+{
+ DvmDex* pDvmDex = referrer->pDvmDex;
+ InstField* resField;
+
+ resField = (InstField*) dvmDexGetResolvedField(pDvmDex, ifieldIdx);
+ if (resField == NULL) {
+ const DexFieldId* pFieldId;
+ ClassObject* resClass;
+
+ pFieldId = dexGetFieldId(pDvmDex->pDexFile, ifieldIdx);
+
+ /*
+ * Find the field's class.
+ */
+ resClass = dvmOptResolveClass(referrer, pFieldId->classIdx);
+ if (resClass == NULL) {
+ //dvmClearOptException(dvmThreadSelf());
+ assert(!dvmCheckException(dvmThreadSelf()));
+ return NULL;
+ }
+
+ resField = dvmFindInstanceFieldHier(resClass,
+ dexStringById(pDvmDex->pDexFile, pFieldId->nameIdx),
+ dexStringByTypeIdx(pDvmDex->pDexFile, pFieldId->typeIdx));
+ if (resField == NULL) {
+ LOGD("DexOpt: couldn't find field %s.%s\n",
+ resClass->descriptor,
+ dexStringById(pDvmDex->pDexFile, pFieldId->nameIdx));
+ return NULL;
+ }
+
+ /*
+ * Add it to the resolved table so we're faster on the next lookup.
+ */
+ dvmDexSetResolvedField(pDvmDex, ifieldIdx, (Field*) resField);
+ }
+
+ /* access allowed? */
+ tweakLoader(referrer, resField->field.clazz);
+ bool allowed = dvmCheckFieldAccess(referrer, (Field*)resField);
+ untweakLoader(referrer, resField->field.clazz);
+ if (!allowed) {
+ LOGI("DexOpt: access denied from %s to field %s.%s\n",
+ referrer->descriptor, resField->field.clazz->descriptor,
+ resField->field.name);
+ return NULL;
+ }
+
+ return resField;
+}
+
+/*
+ * Alternate version of dvmResolveStaticField().
+ *
+ * Does not force initialization of the resolved field's class.
+ */
+StaticField* dvmOptResolveStaticField(ClassObject* referrer, u4 sfieldIdx)
+{
+ DvmDex* pDvmDex = referrer->pDvmDex;
+ StaticField* resField;
+
+ resField = (StaticField*)dvmDexGetResolvedField(pDvmDex, sfieldIdx);
+ if (resField == NULL) {
+ const DexFieldId* pFieldId;
+ ClassObject* resClass;
+
+ pFieldId = dexGetFieldId(pDvmDex->pDexFile, sfieldIdx);
+
+ /*
+ * Find the field's class.
+ */
+ resClass = dvmOptResolveClass(referrer, pFieldId->classIdx);
+ if (resClass == NULL) {
+ //dvmClearOptException(dvmThreadSelf());
+ assert(!dvmCheckException(dvmThreadSelf()));
+ return NULL;
+ }
+
+ resField = dvmFindStaticFieldHier(resClass,
+ dexStringById(pDvmDex->pDexFile, pFieldId->nameIdx),
+ dexStringByTypeIdx(pDvmDex->pDexFile, pFieldId->typeIdx));
+ if (resField == NULL) {
+ LOGD("DexOpt: couldn't find static field\n");
+ return NULL;
+ }
+
+ /*
+ * Add it to the resolved table so we're faster on the next lookup.
+ *
+ * We can only do this if we're in "dexopt", because the presence
+ * of a valid value in the resolution table implies that the class
+ * containing the static field has been initialized.
+ */
+ if (gDvm.optimizing)
+ dvmDexSetResolvedField(pDvmDex, sfieldIdx, (Field*) resField);
+ }
+
+ /* access allowed? */
+ tweakLoader(referrer, resField->field.clazz);
+ bool allowed = dvmCheckFieldAccess(referrer, (Field*)resField);
+ untweakLoader(referrer, resField->field.clazz);
+ if (!allowed) {
+ LOGI("DexOpt: access denied from %s to field %s.%s\n",
+ referrer->descriptor, resField->field.clazz->descriptor,
+ resField->field.name);
+ return NULL;
+ }
+
+ return resField;
+}
+
+
+/*
+ * Rewrite an iget/iput instruction. These all have the form:
+ * op vA, vB, field@CCCC
+ *
+ * Where vA holds the value, vB holds the object reference, and CCCC is
+ * the field reference constant pool offset. We want to replace CCCC
+ * with the byte offset from the start of the object.
+ *
+ * "clazz" is the referring class. We need this because we verify
+ * access rights here.
+ */
+static void rewriteInstField(Method* method, u2* insns, OpCode newOpc)
+{
+ ClassObject* clazz = method->clazz;
+ u2 fieldIdx = insns[1];
+ InstField* field;
+ int byteOffset;
+
+ field = dvmOptResolveInstField(clazz, fieldIdx);
+ if (field == NULL) {
+ LOGI("DexOpt: unable to optimize field ref 0x%04x at 0x%02x in %s.%s\n",
+ fieldIdx, (int) (insns - method->insns), clazz->descriptor,
+ method->name);
+ return;
+ }
+
+ if (field->byteOffset >= 65536) {
+ LOGI("DexOpt: field offset exceeds 64K (%d)\n", field->byteOffset);
+ return;
+ }
+
+ insns[0] = (insns[0] & 0xff00) | (u2) newOpc;
+ insns[1] = (u2) field->byteOffset;
+ LOGVV("DexOpt: rewrote access to %s.%s --> %d\n",
+ field->field.clazz->descriptor, field->field.name,
+ field->byteOffset);
+}
+
+/*
+ * Alternate version of dvmResolveMethod().
+ *
+ * Doesn't throw exceptions, and checks access on every lookup.
+ */
+Method* dvmOptResolveMethod(ClassObject* referrer, u4 methodIdx,
+ MethodType methodType)
+{
+ DvmDex* pDvmDex = referrer->pDvmDex;
+ Method* resMethod;
+
+ assert(methodType != METHOD_INTERFACE);
+
+ LOGVV("--- resolving method %u (referrer=%s)\n", methodIdx,
+ referrer->descriptor);
+
+ resMethod = dvmDexGetResolvedMethod(pDvmDex, methodIdx);
+ if (resMethod == NULL) {
+ const DexMethodId* pMethodId;
+ ClassObject* resClass;
+
+ pMethodId = dexGetMethodId(pDvmDex->pDexFile, methodIdx);
+
+ resClass = dvmOptResolveClass(referrer, pMethodId->classIdx);
+ if (resClass == NULL) {
+ /* can't find the class that the method is a part of */
+ LOGV("DexOpt: can't find called method's class (?.%s)\n",
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx));
+ return NULL;
+ }
+ if (dvmIsInterfaceClass(resClass)) {
+ /* method is part of an interface; this is wrong method for that */
+ LOGW("DexOpt: method is in an interface\n");
+ return NULL;
+ }
+
+ /*
+ * We need to chase up the class hierarchy to find methods defined
+ * in super-classes. (We only want to check the current class
+ * if we're looking for a constructor.)
+ */
+ DexProto proto;
+ dexProtoSetFromMethodId(&proto, pDvmDex->pDexFile, pMethodId);
+
+ if (methodType == METHOD_DIRECT) {
+ resMethod = dvmFindDirectMethod(resClass,
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx), &proto);
+ } else if (methodType == METHOD_STATIC) {
+ resMethod = dvmFindDirectMethodHier(resClass,
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx), &proto);
+ } else {
+ resMethod = dvmFindVirtualMethodHier(resClass,
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx), &proto);
+ }
+
+ if (resMethod == NULL) {
+ LOGV("DexOpt: couldn't find method '%s'\n",
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx));
+ return NULL;
+ }
+
+ /* see if this is a pure-abstract method */
+ if (dvmIsAbstractMethod(resMethod) && !dvmIsAbstractClass(resClass)) {
+ LOGW("DexOpt: pure-abstract method '%s' in %s\n",
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx),
+ resClass->descriptor);
+ return NULL;
+ }
+
+ /*
+ * Add it to the resolved table so we're faster on the next lookup.
+ *
+ * We can only do this for static methods if we're not in "dexopt",
+ * because the presence of a valid value in the resolution table
+ * implies that the class containing the static field has been
+ * initialized.
+ */
+ if (methodType != METHOD_STATIC || gDvm.optimizing)
+ dvmDexSetResolvedMethod(pDvmDex, methodIdx, resMethod);
+ }
+
+ LOGVV("--- found method %d (%s.%s)\n",
+ methodIdx, resMethod->clazz->descriptor, resMethod->name);
+
+ /* access allowed? */
+ tweakLoader(referrer, resMethod->clazz);
+ bool allowed = dvmCheckMethodAccess(referrer, resMethod);
+ untweakLoader(referrer, resMethod->clazz);
+ if (!allowed) {
+ IF_LOGI() {
+ char* desc = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ LOGI("DexOpt: illegal method access (call %s.%s %s from %s)\n",
+ resMethod->clazz->descriptor, resMethod->name, desc,
+ referrer->descriptor);
+ free(desc);
+ }
+ return NULL;
+ }
+
+ return resMethod;
+}
+
+/*
+ * Rewrite invoke-virtual, invoke-virtual/range, invoke-super, and
+ * invoke-super/range. These all have the form:
+ * op vAA, meth@BBBB, reg stuff @CCCC
+ *
+ * We want to replace the method constant pool index BBBB with the
+ * vtable index.
+ */
+static bool rewriteVirtualInvoke(Method* method, u2* insns, OpCode newOpc)
+{
+ ClassObject* clazz = method->clazz;
+ Method* baseMethod;
+ u2 methodIdx = insns[1];
+
+ baseMethod = dvmOptResolveMethod(clazz, methodIdx, METHOD_VIRTUAL);
+ if (baseMethod == NULL) {
+ LOGD("DexOpt: unable to optimize virt call 0x%04x at 0x%02x in %s.%s\n",
+ methodIdx,
+ (int) (insns - method->insns), clazz->descriptor,
+ method->name);
+ return false;
+ }
+
+ assert((insns[0] & 0xff) == OP_INVOKE_VIRTUAL ||
+ (insns[0] & 0xff) == OP_INVOKE_VIRTUAL_RANGE ||
+ (insns[0] & 0xff) == OP_INVOKE_SUPER ||
+ (insns[0] & 0xff) == OP_INVOKE_SUPER_RANGE);
+
+ /*
+ * Note: Method->methodIndex is a u2 and is range checked during the
+ * initial load.
+ */
+ insns[0] = (insns[0] & 0xff00) | (u2) newOpc;
+ insns[1] = baseMethod->methodIndex;
+
+ //LOGI("DexOpt: rewrote call to %s.%s --> %s.%s\n",
+ // method->clazz->descriptor, method->name,
+ // baseMethod->clazz->descriptor, baseMethod->name);
+
+ return true;
+}
+
+/*
+ * Rewrite invoke-direct, which has the form:
+ * op vAA, meth@BBBB, reg stuff @CCCC
+ *
+ * There isn't a lot we can do to make this faster, but in some situations
+ * we can make it go away entirely.
+ *
+ * This must only be used when the invoked method does nothing and has
+ * no return value (the latter being very important for verification).
+ */
+static bool rewriteDirectInvoke(Method* method, u2* insns)
+{
+ ClassObject* clazz = method->clazz;
+ Method* calledMethod;
+ u2 methodIdx = insns[1];
+
+ calledMethod = dvmOptResolveMethod(clazz, methodIdx, METHOD_DIRECT);
+ if (calledMethod == NULL) {
+ LOGD("DexOpt: unable to opt direct call 0x%04x at 0x%02x in %s.%s\n",
+ methodIdx,
+ (int) (insns - method->insns), clazz->descriptor,
+ method->name);
+ return false;
+ }
+
+ /* TODO: verify that java.lang.Object() is actually empty! */
+ if (calledMethod->clazz == gDvm.classJavaLangObject &&
+ dvmCompareNameDescriptorAndMethod("<init>", "()V", calledMethod) == 0)
+ {
+ /*
+ * Replace with "empty" instruction. DO NOT disturb anything
+ * else about it, as we want it to function the same as
+ * OP_INVOKE_DIRECT when debugging is enabled.
+ */
+ assert((insns[0] & 0xff) == OP_INVOKE_DIRECT);
+ insns[0] = (insns[0] & 0xff00) | (u2) OP_INVOKE_DIRECT_EMPTY;
+
+ //LOGI("DexOpt: marked-empty call to %s.%s --> %s.%s\n",
+ // method->clazz->descriptor, method->name,
+ // calledMethod->clazz->descriptor, calledMethod->name);
+ }
+
+ return true;
+}
+
+/*
+ * Resolve an interface method reference.
+ *
+ * Returns NULL if the method was not found. Does not throw an exception.
+ */
+Method* dvmOptResolveInterfaceMethod(ClassObject* referrer, u4 methodIdx)
+{
+ DvmDex* pDvmDex = referrer->pDvmDex;
+ Method* resMethod;
+ int i;
+
+ LOGVV("--- resolving interface method %d (referrer=%s)\n",
+ methodIdx, referrer->descriptor);
+
+ resMethod = dvmDexGetResolvedMethod(pDvmDex, methodIdx);
+ if (resMethod == NULL) {
+ const DexMethodId* pMethodId;
+ ClassObject* resClass;
+
+ pMethodId = dexGetMethodId(pDvmDex->pDexFile, methodIdx);
+
+ resClass = dvmOptResolveClass(referrer, pMethodId->classIdx);
+ if (resClass == NULL) {
+ /* can't find the class that the method is a part of */
+ dvmClearOptException(dvmThreadSelf());
+ return NULL;
+ }
+ if (!dvmIsInterfaceClass(resClass)) {
+ /* whoops */
+ LOGI("Interface method not part of interface class\n");
+ return NULL;
+ }
+
+ const char* methodName =
+ dexStringById(pDvmDex->pDexFile, pMethodId->nameIdx);
+ DexProto proto;
+ dexProtoSetFromMethodId(&proto, pDvmDex->pDexFile, pMethodId);
+
+ LOGVV("+++ looking for '%s' '%s' in resClass='%s'\n",
+ methodName, methodSig, resClass->descriptor);
+ resMethod = dvmFindVirtualMethod(resClass, methodName, &proto);
+ if (resMethod == NULL) {
+ /* scan superinterfaces and superclass interfaces */
+ LOGVV("+++ did not resolve immediately\n");
+ for (i = 0; i < resClass->iftableCount; i++) {
+ resMethod = dvmFindVirtualMethod(resClass->iftable[i].clazz,
+ methodName, &proto);
+ if (resMethod != NULL)
+ break;
+ }
+
+ if (resMethod == NULL) {
+ LOGVV("+++ unable to resolve method %s\n", methodName);
+ return NULL;
+ }
+ } else {
+ LOGVV("+++ resolved immediately: %s (%s %d)\n", resMethod->name,
+ resMethod->clazz->descriptor, (u4) resMethod->methodIndex);
+ }
+
+ /* we're expecting this to be abstract */
+ if (!dvmIsAbstractMethod(resMethod)) {
+ char* desc = dexProtoCopyMethodDescriptor(&resMethod->prototype);
+ LOGW("Found non-abstract interface method %s.%s %s\n",
+ resMethod->clazz->descriptor, resMethod->name, desc);
+ free(desc);
+ return NULL;
+ }
+
+ /*
+ * Add it to the resolved table so we're faster on the next lookup.
+ */
+ dvmDexSetResolvedMethod(pDvmDex, methodIdx, resMethod);
+ }
+
+ LOGVV("--- found interface method %d (%s.%s)\n",
+ methodIdx, resMethod->clazz->descriptor, resMethod->name);
+
+ /* interface methods are always public; no need to check access */
+
+ return resMethod;
+}
+/*
+ * See if the method being called can be rewritten as an inline operation.
+ * Works for invoke-virtual, invoke-direct, and invoke-static.
+ *
+ * Returns "true" if we replace it.
+ */
+static bool rewriteExecuteInline(Method* method, u2* insns,
+ MethodType methodType, const InlineSub* inlineSubs)
+{
+ ClassObject* clazz = method->clazz;
+ Method* calledMethod;
+ u2 methodIdx = insns[1];
+
+ //return false;
+
+ calledMethod = dvmOptResolveMethod(clazz, methodIdx, methodType);
+ if (calledMethod == NULL) {
+ LOGV("+++ DexOpt inline: can't find %d\n", methodIdx);
+ return false;
+ }
+
+ while (inlineSubs->method != NULL) {
+ /*
+ if (extra) {
+ LOGI("comparing %p vs %p %s.%s %s\n",
+ inlineSubs->method, calledMethod,
+ inlineSubs->method->clazz->descriptor,
+ inlineSubs->method->name,
+ inlineSubs->method->signature);
+ }
+ */
+ if (inlineSubs->method == calledMethod) {
+ assert((insns[0] & 0xff) == OP_INVOKE_DIRECT ||
+ (insns[0] & 0xff) == OP_INVOKE_STATIC ||
+ (insns[0] & 0xff) == OP_INVOKE_VIRTUAL);
+ insns[0] = (insns[0] & 0xff00) | (u2) OP_EXECUTE_INLINE;
+ insns[1] = (u2) inlineSubs->inlineIdx;
+
+ //LOGI("DexOpt: execute-inline %s.%s --> %s.%s\n",
+ // method->clazz->descriptor, method->name,
+ // calledMethod->clazz->descriptor, calledMethod->name);
+ return true;
+ }
+
+ inlineSubs++;
+ }
+
+ return false;
+}
+
diff --git a/vm/analysis/DexOptimize.h b/vm/analysis/DexOptimize.h
new file mode 100644
index 0000000..01aa828
--- /dev/null
+++ b/vm/analysis/DexOptimize.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * DEX optimization declarations.
+ */
+#ifndef _DALVIK_DEXOPTIMIZE
+#define _DALVIK_DEXOPTIMIZE
+
+/*
+ * Global DEX optimizer control. Determines the circumstances in which we
+ * try to rewrite instructions in the DEX file.
+ */
+typedef enum DexOptimizerMode {
+ OPTIMIZE_MODE_UNKNOWN = 0,
+ OPTIMIZE_MODE_NONE, /* never optimize */
+ OPTIMIZE_MODE_VERIFIED, /* only optimize verified classes (default) */
+ OPTIMIZE_MODE_ALL /* optimize all classes */
+} DexOptimizerMode;
+
+/* some additional bit flags for dexopt */
+enum DexoptFlags {
+ DEXOPT_GEN_REGISTER_MAPS = 1, /* generate register maps during verify */
+};
+
+/*
+ * Given the full path to a DEX or Jar file, and (if appropriate) the name
+ * within the Jar, open the optimized version from the cache.
+ *
+ * If "*pNewFile" is set, a new file has been created with only a stub
+ * "opt" header, and the caller is expected to fill in the blanks.
+ *
+ * Returns the file descriptor, locked and seeked past the "opt" header.
+ */
+int dvmOpenCachedDexFile(const char* fileName, const char* cachedFile,
+ u4 modWhen, u4 crc, bool isBootstrap, bool* pNewFile, bool createIfMissing);
+
+/*
+ * Unlock the specified file descriptor. Use in conjunction with
+ * dvmOpenCachedDexFile().
+ *
+ * Returns true on success.
+ */
+bool dvmUnlockCachedDexFile(int fd);
+
+/*
+ * Verify the contents of the "opt" header, and check the DEX file's
+ * dependencies on its source zip (if available).
+ */
+bool dvmCheckOptHeaderAndDependencies(int fd, bool sourceAvail, u4 modWhen,
+ u4 crc, bool expectVerify, bool expectOpt);
+
+/*
+ * Optimize a DEX file. The file must start with the "opt" header, followed
+ * by the plain DEX data. It must be mmap()able.
+ *
+ * "fileName" is only used for debug output.
+ */
+bool dvmOptimizeDexFile(int fd, off_t dexOffset, long dexLen,
+ const char* fileName, u4 modWhen, u4 crc, bool isBootstrap);
+
+/*
+ * Continue the optimization process on the other side of a fork/exec.
+ */
+bool dvmContinueOptimization(int fd, off_t dexOffset, long dexLength,
+ const char* fileName, u4 modWhen, u4 crc, bool isBootstrap);
+
+/*
+ * Abbreviated resolution functions, for use by optimization and verification
+ * code.
+ */
+ClassObject* dvmOptResolveClass(ClassObject* referrer, u4 classIdx);
+Method* dvmOptResolveMethod(ClassObject* referrer, u4 methodIdx,
+ MethodType methodType);
+Method* dvmOptResolveInterfaceMethod(ClassObject* referrer, u4 methodIdx);
+InstField* dvmOptResolveInstField(ClassObject* referrer, u4 ifieldIdx);
+StaticField* dvmOptResolveStaticField(ClassObject* referrer, u4 sfieldIdx);
+
+#endif /*_DALVIK_DEXOPTIMIZE*/
diff --git a/vm/analysis/DexVerify.c b/vm/analysis/DexVerify.c
new file mode 100644
index 0000000..354d68f
--- /dev/null
+++ b/vm/analysis/DexVerify.c
@@ -0,0 +1,713 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik classfile verification. This file contains the verifier entry
+ * points and the static constraint checks.
+ */
+#include "Dalvik.h"
+#include "analysis/CodeVerify.h"
+
+
+/* fwd */
+static bool verifyMethod(Method* meth, int verifyFlags);
+static bool verifyInstructions(const Method* meth, InsnFlags* insnFlags,
+ int verifyFlags);
+
+
+/*
+ * Initialize some things we need for verification.
+ */
+bool dvmVerificationStartup(void)
+{
+ gDvm.instrWidth = dexCreateInstrWidthTable();
+ gDvm.instrFormat = dexCreateInstrFormatTable();
+ gDvm.instrFlags = dexCreateInstrFlagsTable();
+ return (gDvm.instrWidth != NULL && gDvm.instrFormat!= NULL);
+}
+
+/*
+ * Initialize some things we need for verification.
+ */
+void dvmVerificationShutdown(void)
+{
+ free(gDvm.instrWidth);
+ free(gDvm.instrFormat);
+ free(gDvm.instrFlags);
+}
+
+/*
+ * Induce verification on all classes loaded from this DEX file as part
+ * of pre-verification and optimization. This is never called from a
+ * normally running VM.
+ *
+ * Returns "true" when all classes have been processed.
+ */
+bool dvmVerifyAllClasses(DexFile* pDexFile)
+{
+ u4 count = pDexFile->pHeader->classDefsSize;
+ u4 idx;
+
+ assert(gDvm.optimizing);
+
+ if (gDvm.classVerifyMode == VERIFY_MODE_NONE) {
+ LOGV("+++ verification is disabled, skipping all classes\n");
+ return true;
+ }
+ if (gDvm.classVerifyMode == VERIFY_MODE_REMOTE &&
+ gDvm.optimizingBootstrapClass)
+ {
+ LOGV("+++ verification disabled for bootstrap classes\n");
+ return true;
+ }
+
+ for (idx = 0; idx < count; idx++) {
+ const DexClassDef* pClassDef;
+ const char* classDescriptor;
+ ClassObject* clazz;
+
+ pClassDef = dexGetClassDef(pDexFile, idx);
+ classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+ /* all classes are loaded into the bootstrap class loader */
+ clazz = dvmLookupClass(classDescriptor, NULL, false);
+ if (clazz != NULL) {
+ if (clazz->pDvmDex->pDexFile != pDexFile) {
+ LOGD("DexOpt: not verifying '%s': multiple definitions\n",
+ classDescriptor);
+ } else {
+ if (dvmVerifyClass(clazz, VERIFY_DEFAULT)) {
+ assert((clazz->accessFlags & JAVA_FLAGS_MASK) ==
+ pClassDef->accessFlags);
+ ((DexClassDef*)pClassDef)->accessFlags |=
+ CLASS_ISPREVERIFIED;
+ }
+ /* keep going even if one fails */
+ }
+ } else {
+ LOGV("DexOpt: +++ not verifying '%s'\n", classDescriptor);
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Verify a class.
+ *
+ * By the time we get here, the value of gDvm.classVerifyMode should already
+ * have been factored in. If you want to call into the verifier even
+ * though verification is disabled, that's your business.
+ *
+ * Returns "true" on success.
+ */
+bool dvmVerifyClass(ClassObject* clazz, int verifyFlags)
+{
+ int i;
+
+ if (dvmIsClassVerified(clazz)) {
+ LOGD("Ignoring duplicate verify attempt on %s\n", clazz->descriptor);
+ return true;
+ }
+
+ //LOGI("Verify1 '%s'\n", clazz->descriptor);
+
+ // TODO - verify class structure in DEX?
+
+ for (i = 0; i < clazz->directMethodCount; i++) {
+ if (!verifyMethod(&clazz->directMethods[i], verifyFlags)) {
+ LOG_VFY("Verifier rejected class %s\n", clazz->descriptor);
+ return false;
+ }
+ }
+ for (i = 0; i < clazz->virtualMethodCount; i++) {
+ if (!verifyMethod(&clazz->virtualMethods[i], verifyFlags)) {
+ LOG_VFY("Verifier rejected class %s\n", clazz->descriptor);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Perform verification on a single method.
+ *
+ * We do this in three passes:
+ * (1) Walk through all code units, determining instruction lengths.
+ * (2) Do static checks, including branch target and operand validation.
+ * (3) Do structural checks, including data-flow analysis.
+ *
+ * Some checks may be bypassed depending on the verification mode. We can't
+ * turn this stuff off completely if we want to do "exact" GC.
+ *
+ * - operands of getfield, putfield, getstatic, putstatic must be valid
+ * - operands of method invocation instructions must be valid
+ *
+ * - code array must not be empty
+ * - (N/A) code_length must be less than 65536
+ * - opcode of first instruction begins at index 0
+ * - only documented instructions may appear
+ * - each instruction follows the last
+ * - (below) last byte of last instruction is at (code_length-1)
+ */
+static bool verifyMethod(Method* meth, int verifyFlags)
+{
+ bool result = false;
+ UninitInstanceMap* uninitMap = NULL;
+ InsnFlags* insnFlags = NULL;
+ int i, newInstanceCount;
+
+ /*
+ * If there aren't any instructions, make sure that's expected, then
+ * exit successfully. Note: meth->insns gets set to a native function
+ * pointer on first call.
+ */
+ if (dvmGetMethodInsnsSize(meth) == 0) {
+ if (!dvmIsNativeMethod(meth) && !dvmIsAbstractMethod(meth)) {
+ LOG_VFY_METH(meth,
+ "VFY: zero-length code in concrete non-native method\n");
+ goto bail;
+ }
+
+ goto success;
+ }
+
+ /*
+ * Sanity-check the register counts. ins + locals = registers, so make
+ * sure that ins <= registers.
+ */
+ if (meth->insSize > meth->registersSize) {
+ LOG_VFY_METH(meth, "VFY: bad register counts (ins=%d regs=%d)\n",
+ meth->insSize, meth->registersSize);
+ goto bail;
+ }
+
+ /*
+ * Allocate and populate an array to hold instruction data.
+ *
+ * TODO: Consider keeping a reusable pre-allocated array sitting
+ * around for smaller methods.
+ */
+ insnFlags = (InsnFlags*)
+ calloc(dvmGetMethodInsnsSize(meth), sizeof(InsnFlags));
+ if (insnFlags == NULL)
+ goto bail;
+
+ /*
+ * Compute the width of each instruction and store the result in insnFlags.
+ * Count up the #of occurrences of new-instance instructions while we're
+ * at it.
+ */
+ if (!dvmComputeCodeWidths(meth, insnFlags, &newInstanceCount))
+ goto bail;
+
+ /*
+ * Allocate a map to hold the classes of uninitialized instances.
+ */
+ uninitMap = dvmCreateUninitInstanceMap(meth, insnFlags, newInstanceCount);
+ if (uninitMap == NULL)
+ goto bail;
+
+ /*
+ * Set the "in try" flags for all instructions guarded by a "try" block.
+ */
+ if (!dvmSetTryFlags(meth, insnFlags))
+ goto bail;
+
+ /*
+ * Perform static instruction verification.
+ */
+ if (!verifyInstructions(meth, insnFlags, verifyFlags))
+ goto bail;
+
+ /*
+ * Do code-flow analysis. Do this after verifying the branch targets
+ * so we don't need to worry about it here.
+ *
+ * If there are no registers, we don't need to do much in the way of
+ * analysis, but we still need to verify that nothing actually tries
+ * to use a register.
+ */
+ if (!dvmVerifyCodeFlow(meth, insnFlags, uninitMap)) {
+ //LOGD("+++ %s failed code flow\n", meth->name);
+ goto bail;
+ }
+
+success:
+ result = true;
+
+bail:
+ dvmFreeUninitInstanceMap(uninitMap);
+ free(insnFlags);
+ return result;
+}
+
+
+/*
+ * Verify an array data table. "curOffset" is the offset of the fill-array-data
+ * instruction.
+ */
+static bool checkArrayData(const Method* meth, int curOffset)
+{
+ const int insnCount = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns + curOffset;
+ const u2* arrayData;
+ int valueCount, valueWidth, tableSize;
+ int offsetToArrayData;
+
+ assert(curOffset >= 0 && curOffset < insnCount);
+
+ /* make sure the start of the array data table is in range */
+ offsetToArrayData = insns[1] | (((s4)insns[2]) << 16);
+ if (curOffset + offsetToArrayData < 0 ||
+ curOffset + offsetToArrayData + 2 >= insnCount)
+ {
+ LOG_VFY_METH(meth,
+ "VFY: invalid array data start: at %d, data offset %d, count %d\n",
+ curOffset, offsetToArrayData, insnCount);
+ return false;
+ }
+
+ /* offset to array data table is a relative branch-style offset */
+ arrayData = insns + offsetToArrayData;
+
+ /* make sure the table is 32-bit aligned */
+ if ((((u4) arrayData) & 0x03) != 0) {
+ LOG_VFY_METH(meth,
+ "VFY: unaligned array data table: at %d, data offset %d\n",
+ curOffset, offsetToArrayData);
+ return false;
+ }
+
+ valueWidth = arrayData[1];
+ valueCount = *(u4*)(&arrayData[2]);
+
+ tableSize = 4 + (valueWidth * valueCount + 1) / 2;
+
+ /* make sure the end of the switch is in range */
+ if (curOffset + offsetToArrayData + tableSize > insnCount) {
+ LOG_VFY_METH(meth,
+ "VFY: invalid array data end: at %d, data offset %d, end %d, "
+ "count %d\n",
+ curOffset, offsetToArrayData,
+ curOffset + offsetToArrayData + tableSize,
+ insnCount);
+ return false;
+ }
+
+ return true;
+}
+
+
+/*
+ * Decode the current instruction.
+ */
+static void decodeInstruction(const Method* meth, int insnIdx,
+ DecodedInstruction* pDecInsn)
+{
+ dexDecodeInstruction(gDvm.instrFormat, meth->insns + insnIdx, pDecInsn);
+}
+
+
+/*
+ * Perform static checks on a "new-instance" instruction. Specifically,
+ * make sure the class reference isn't for an array class.
+ *
+ * We don't need the actual class, just a pointer to the class name.
+ */
+static bool checkNewInstance(const Method* meth, int insnIdx)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+ const char* classDescriptor;
+ u4 idx;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ idx = decInsn.vB; // 2nd item
+ if (idx >= pDvmDex->pHeader->typeIdsSize) {
+ LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
+ idx, pDvmDex->pHeader->typeIdsSize);
+ return false;
+ }
+
+ classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx);
+ if (classDescriptor[0] != 'L') {
+ LOG_VFY_METH(meth, "VFY: can't call new-instance on type '%s'\n",
+ classDescriptor);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static checks on a "new-array" instruction. Specifically, make
+ * sure they aren't creating an array of arrays that causes the number of
+ * dimensions to exceed 255.
+ */
+static bool checkNewArray(const Method* meth, int insnIdx)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+ const char* classDescriptor;
+ u4 idx;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ idx = decInsn.vC; // 3rd item
+ if (idx >= pDvmDex->pHeader->typeIdsSize) {
+ LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
+ idx, pDvmDex->pHeader->typeIdsSize);
+ return false;
+ }
+
+ classDescriptor = dexStringByTypeIdx(pDvmDex->pDexFile, idx);
+
+ int bracketCount = 0;
+ const char* cp = classDescriptor;
+ while (*cp++ == '[')
+ bracketCount++;
+
+ if (bracketCount == 0) {
+ /* The given class must be an array type. */
+ LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (not an array)\n",
+ classDescriptor);
+ return false;
+ } else if (bracketCount > 255) {
+ /* It is illegal to create an array of more than 255 dimensions. */
+ LOG_VFY_METH(meth, "VFY: can't new-array class '%s' (exceeds limit)\n",
+ classDescriptor);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static checks on an instruction that takes a class constant.
+ * Ensure that the class index is in the valid range.
+ */
+static bool checkTypeIndex(const Method* meth, int insnIdx, bool useB)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+ u4 idx;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ if (useB)
+ idx = decInsn.vB;
+ else
+ idx = decInsn.vC;
+ if (idx >= pDvmDex->pHeader->typeIdsSize) {
+ LOG_VFY_METH(meth, "VFY: bad type index %d (max %d)\n",
+ idx, pDvmDex->pHeader->typeIdsSize);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static checks on a field get or set instruction. All we do
+ * here is ensure that the field index is in the valid range.
+ */
+static bool checkFieldIndex(const Method* meth, int insnIdx, bool useB)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+ u4 idx;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ if (useB)
+ idx = decInsn.vB;
+ else
+ idx = decInsn.vC;
+ if (idx >= pDvmDex->pHeader->fieldIdsSize) {
+ LOG_VFY_METH(meth,
+ "VFY: bad field index %d (max %d) at offset 0x%04x\n",
+ idx, pDvmDex->pHeader->fieldIdsSize, insnIdx);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static checks on a method invocation instruction. All we do
+ * here is ensure that the method index is in the valid range.
+ */
+static bool checkMethodIndex(const Method* meth, int insnIdx)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ if (decInsn.vB >= pDvmDex->pHeader->methodIdsSize) {
+ LOG_VFY_METH(meth, "VFY: bad method index %d (max %d)\n",
+ decInsn.vB, pDvmDex->pHeader->methodIdsSize);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static checks on a string constant instruction. All we do
+ * here is ensure that the string index is in the valid range.
+ */
+static bool checkStringIndex(const Method* meth, int insnIdx)
+{
+ DvmDex* pDvmDex = meth->clazz->pDvmDex;
+ DecodedInstruction decInsn;
+
+ decodeInstruction(meth, insnIdx, &decInsn);
+ if (decInsn.vB >= pDvmDex->pHeader->stringIdsSize) {
+ LOG_VFY_METH(meth, "VFY: bad string index %d (max %d)\n",
+ decInsn.vB, pDvmDex->pHeader->stringIdsSize);
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Perform static verification on instructions.
+ *
+ * As a side effect, this sets the "branch target" flags in InsnFlags.
+ *
+ * "(CF)" items are handled during code-flow analysis.
+ *
+ * v3 4.10.1
+ * - target of each jump and branch instruction must be valid
+ * - targets of switch statements must be valid
+ * - (CF) operands referencing constant pool entries must be valid
+ * - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
+ * - (new) verify operands of "quick" field ops
+ * - (CF) operands of method invocation instructions must be valid
+ * - (new) verify operands of "quick" method invoke ops
+ * - (CF) only invoke-direct can call a method starting with '<'
+ * - (CF) <clinit> must never be called explicitly
+ * - (CF) operands of instanceof, checkcast, new (and variants) must be valid
+ * - new-array[-type] limited to 255 dimensions
+ * - can't use "new" on an array class
+ * - (?) limit dimensions in multi-array creation
+ * - (CF) local variable load/store register values must be in valid range
+ *
+ * v3 4.11.1.2
+ * - branches must be within the bounds of the code array
+ * - targets of all control-flow instructions are the start of an instruction
+ * - (CF) register accesses fall within range of allocated registers
+ * - (N/A) access to constant pool must be of appropriate type
+ * - (CF) code does not end in the middle of an instruction
+ * - (CF) execution cannot fall off the end of the code
+ * - (earlier) for each exception handler, the "try" area must begin and
+ * end at the start of an instruction (end can be at the end of the code)
+ * - (earlier) for each exception handler, the handler must start at a valid
+ * instruction
+ *
+ * TODO: move some of the "CF" items in here for better performance (the
+ * code-flow analysis sometimes has to process the same instruction several
+ * times).
+ */
+static bool verifyInstructions(const Method* meth, InsnFlags* insnFlags,
+ int verifyFlags)
+{
+ const int insnCount = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns;
+ int i;
+
+ /* the start of the method is a "branch target" */
+ dvmInsnSetBranchTarget(insnFlags, 0, true);
+
+ for (i = 0; i < insnCount; /**/) {
+ static int gcMask = kInstrCanBranch | kInstrCanSwitch |
+ kInstrCanThrow | kInstrCanReturn;
+ int width = dvmInsnGetWidth(insnFlags, i);
+ OpCode opcode = *insns & 0xff;
+ InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode);
+ int offset, absOffset;
+
+ if ((opFlags & gcMask) != 0)
+ dvmInsnSetGcPoint(insnFlags, i, true);
+
+ switch (opcode) {
+ case OP_NOP:
+ /* plain no-op or switch table data; nothing to do here */
+ break;
+
+ case OP_CONST_STRING:
+ case OP_CONST_STRING_JUMBO:
+ if (!checkStringIndex(meth, i))
+ return false;
+ break;
+
+ case OP_CONST_CLASS:
+ case OP_CHECK_CAST:
+ if (!checkTypeIndex(meth, i, true))
+ return false;
+ break;
+ case OP_INSTANCE_OF:
+ if (!checkTypeIndex(meth, i, false))
+ return false;
+ break;
+
+ case OP_PACKED_SWITCH:
+ case OP_SPARSE_SWITCH:
+ /* verify the associated table */
+ if (!dvmCheckSwitchTargets(meth, insnFlags, i))
+ return false;
+ break;
+
+ case OP_FILL_ARRAY_DATA:
+ /* verify the associated table */
+ if (!checkArrayData(meth, i))
+ return false;
+ break;
+
+ case OP_GOTO:
+ case OP_GOTO_16:
+ case OP_IF_EQ:
+ case OP_IF_NE:
+ case OP_IF_LT:
+ case OP_IF_GE:
+ case OP_IF_GT:
+ case OP_IF_LE:
+ case OP_IF_EQZ:
+ case OP_IF_NEZ:
+ case OP_IF_LTZ:
+ case OP_IF_GEZ:
+ case OP_IF_GTZ:
+ case OP_IF_LEZ:
+ /* check the destination */
+ if (!dvmCheckBranchTarget(meth, insnFlags, i, false))
+ return false;
+ break;
+ case OP_GOTO_32:
+ /* check the destination; self-branch is okay */
+ if (!dvmCheckBranchTarget(meth, insnFlags, i, true))
+ return false;
+ break;
+
+ case OP_NEW_INSTANCE:
+ if (!checkNewInstance(meth, i))
+ return false;
+ break;
+
+ case OP_NEW_ARRAY:
+ if (!checkNewArray(meth, i))
+ return false;
+ break;
+
+ case OP_FILLED_NEW_ARRAY:
+ if (!checkTypeIndex(meth, i, true))
+ return false;
+ break;
+ case OP_FILLED_NEW_ARRAY_RANGE:
+ if (!checkTypeIndex(meth, i, true))
+ return false;
+ break;
+
+ case OP_IGET:
+ case OP_IGET_WIDE:
+ case OP_IGET_OBJECT:
+ case OP_IGET_BOOLEAN:
+ case OP_IGET_BYTE:
+ case OP_IGET_CHAR:
+ case OP_IGET_SHORT:
+ case OP_IPUT:
+ case OP_IPUT_WIDE:
+ case OP_IPUT_OBJECT:
+ case OP_IPUT_BOOLEAN:
+ case OP_IPUT_BYTE:
+ case OP_IPUT_CHAR:
+ case OP_IPUT_SHORT:
+ /* check the field index */
+ if (!checkFieldIndex(meth, i, false))
+ return false;
+ break;
+ case OP_SGET:
+ case OP_SGET_WIDE:
+ case OP_SGET_OBJECT:
+ case OP_SGET_BOOLEAN:
+ case OP_SGET_BYTE:
+ case OP_SGET_CHAR:
+ case OP_SGET_SHORT:
+ case OP_SPUT:
+ case OP_SPUT_WIDE:
+ case OP_SPUT_OBJECT:
+ case OP_SPUT_BOOLEAN:
+ case OP_SPUT_BYTE:
+ case OP_SPUT_CHAR:
+ case OP_SPUT_SHORT:
+ /* check the field index */
+ if (!checkFieldIndex(meth, i, true))
+ return false;
+ break;
+
+ case OP_INVOKE_VIRTUAL:
+ case OP_INVOKE_SUPER:
+ case OP_INVOKE_DIRECT:
+ case OP_INVOKE_STATIC:
+ case OP_INVOKE_INTERFACE:
+ case OP_INVOKE_VIRTUAL_RANGE:
+ case OP_INVOKE_SUPER_RANGE:
+ case OP_INVOKE_DIRECT_RANGE:
+ case OP_INVOKE_STATIC_RANGE:
+ case OP_INVOKE_INTERFACE_RANGE:
+ /* check the method index */
+ if (!checkMethodIndex(meth, i))
+ return false;
+ break;
+
+ case OP_EXECUTE_INLINE:
+ case OP_INVOKE_DIRECT_EMPTY:
+ case OP_IGET_QUICK:
+ case OP_IGET_WIDE_QUICK:
+ case OP_IGET_OBJECT_QUICK:
+ case OP_IPUT_QUICK:
+ case OP_IPUT_WIDE_QUICK:
+ case OP_IPUT_OBJECT_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+ case OP_INVOKE_SUPER_QUICK:
+ case OP_INVOKE_SUPER_QUICK_RANGE:
+ if ((verifyFlags & VERIFY_ALLOW_OPT_INSTRS) == 0) {
+ LOG_VFY("VFY: not expecting optimized instructions\n");
+ return false;
+ }
+ break;
+
+ default:
+ /* nothing to do */
+ break;
+ }
+
+ assert(width > 0);
+ i += width;
+ insns += width;
+ }
+
+ /* make sure the last instruction ends at the end of the insn area */
+ if (i != insnCount) {
+ LOG_VFY_METH(meth,
+ "VFY: code did not end when expected (end at %d, count %d)\n",
+ i, insnCount);
+ return false;
+ }
+
+ return true;
+}
+
diff --git a/vm/analysis/DexVerify.h b/vm/analysis/DexVerify.h
new file mode 100644
index 0000000..9deaad9
--- /dev/null
+++ b/vm/analysis/DexVerify.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik classfile verification.
+ */
+#ifndef _DALVIK_DEXVERIFY
+#define _DALVIK_DEXVERIFY
+
+/*
+ * Global verification mode. These must be in order from least verification
+ * to most. If we're using "exact GC", we may need to perform some of
+ * the verification steps anyway.
+ */
+typedef enum {
+ VERIFY_MODE_UNKNOWN = 0,
+ VERIFY_MODE_NONE,
+ VERIFY_MODE_REMOTE,
+ VERIFY_MODE_ALL
+} DexClassVerifyMode;
+
+/*
+ * Bit values for dvmVerifyClass() "verifyFlags" arg.
+ *
+ * (Verification is currently a prerequisite for optimization, not an
+ * after-effect, so we don't currently use VERIFY_ALLOW_OPT_INSTRS.)
+ */
+enum {
+ VERIFY_DEFAULT = 0,
+ VERIFY_ALLOW_OPT_INSTRS = 1, // allow instrs emitted by optimizer
+};
+
+bool dvmVerificationStartup(void);
+void dvmVerificationShutdown(void);
+
+/*
+ * Perform verification on all classes loaded from this DEX file. This
+ * should be done before optimization.
+ */
+bool dvmVerifyAllClasses(DexFile* pDexFile);
+
+/*
+ * Verify a single class.
+ */
+bool dvmVerifyClass(ClassObject* clazz, int verifyFlags);
+
+/*
+ * Release the storage associated with a RegisterMap.
+ */
+void dvmFreeRegisterMap(RegisterMap* pMap);
+
+#endif /*_DALVIK_DEXVERIFY*/
diff --git a/vm/analysis/ReduceConstants.c b/vm/analysis/ReduceConstants.c
new file mode 100644
index 0000000..ec7ba0f
--- /dev/null
+++ b/vm/analysis/ReduceConstants.c
@@ -0,0 +1,1057 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Compress the range of "constant pool" indexes in instructions and
+ * annotations to lower runtime RAM footprint.
+ *
+ * NOTE: this is an incomplete experimental feature. Do not try to use it.
+ */
+#include "Dalvik.h"
+#include "libdex/InstrUtils.h"
+#include "libdex/OptInvocation.h"
+#include "libdex/DexClass.h"
+
+/*
+Overview
+
+When a class, method, field, or string constant is referred to from
+Dalvik bytecode, the reference takes the form of an integer index value.
+This value indexes into an array of type_id_item, method_id_item,
+field_id_item, or string_id_item in the DEX file. The first three
+themselves contain (directly or indirectly) indexes to strings that the
+resolver uses to convert the instruction stream index into a pointer to
+the appropriate object or struct.
+
+For example, an invoke-virtual instruction needs to specify which method
+is to be invoked. The method constant indexes into the method_id_item
+array, each entry of which has indexes that specify the defining class
+(type_id_item), method name (string_id_item), and method prototype
+(proto_id_item). The type_id_item just holds an index to a string_id_item,
+which holds the file offset to the string with the class name. The VM
+finds the class by name, then searches through the class' table of virtual
+methods to find one with a matching name and prototype.
+
+This process is fairly expensive, so after the first time it completes
+successfully, the VM records that the method index resolved to a specific
+Method struct. On subsequent execution, the VM just pulls the Method ptr
+out of the resolved-methods array. A similar approach is used with
+the indexes for classes, fields, and string constants.
+
+The problem with this approach is that we need to have a "resolved" entry
+for every possible class, method, field, and string constant in every
+DEX file, even if some of those aren't used from code. The DEX string
+constant table has entries for method prototypes and class names that are
+never used by the code, and "public static final" fields often turn into
+immediate constants. The resolution table entries are only 4 bytes each,
+but there are roughly 200,000 of them in the bootstrap classes alone.
+
+DEX optimization removes many index references by replacing virtual method
+indexes with vtable offsets and instance field indexes with byte offsets.
+In the earlier example, the method would be resolved at "dexopt" time, and
+the instruction rewritten as invoke-virtual-quick with the vtable offset.
+
+(There are comparatively few classes compared to other constant pool
+entries, and a much higher percentage (typically 60-70%) are used. The
+biggest gains come from the string pool.)
+
+Using the resolved-entity tables provides a substantial performance
+improvement, but results in applications allocating 1MB+ of tables that
+are 70% unused. The used and unused entries are freely intermixed,
+preventing effective sharing with the zygote process, and resulting in
+large numbers of private/dirty pages on the native heap as the tables
+populate on first use.
+
+The trick is to reduce the memory usage without decreasing performance.
+Using smaller resolved-entity tables can actually give us a speed boost,
+because we'll have a smaller "live" set of pages and make more effective
+use of the data cache.
+
+
+The approach we're going to use is to determine the set of indexes that
+could potentially be resolved, generate a mapping from the minimal set to
+the full set, and append the mapping to the DEX file. This is done at
+"dexopt" time, because we need to keep the changes in shared/read-only
+pages or we'll lose the benefits of doing the work.
+
+There are two ways to create and use the new mapping:
+
+ (1) Write the entire full->minimal mapping to the ".odex" file. On every
+ instruction that uses an index, use the mapping to determine the
+ "compressed" constant value, and then use that to index into the
+ resolved-entity tables on the heap. The instruction stream is unchanged,
+ and the resolver can easily tell if a given index is cacheable.
+
+ (2) Write the inverse miminal->full mapping to the ".odex" file, and
+ rewrite the constants in the instruction stream. The interpreter is
+ unchanged, and the resolver code uses the mapping to find the original
+ data in the DEX.
+
+Approach #1 is easier and safer to implement, but it requires a table
+lookup every time we execute an instruction that includes a constant
+pool reference. This causes an unacceptable performance hit, chiefly
+because we're hitting semi-random memory pages and hosing the data cache.
+This is mitigated somewhat by DEX optimizations that replace the constant
+with a vtable index or field byte offset. Approach #1 also requires
+a larger map table, increasing the size of the DEX on disk. One nice
+property of approach #1 is that most of the DEX file is unmodified,
+so use of the mapping is a runtime decision.
+
+Approach #2 is preferred for performance reasons.
+
+
+The class/method/field/string resolver code has to handle indices from
+three sources: interpreted instructions, annotations, and exception
+"catch" lists. Sometimes these occur indirectly, e.g. we need to resolve
+the declaring class associated with fields and methods when the latter
+two are themselves resolved. Parsing and rewriting instructions is fairly
+straightforward, but annotations use a complex format with variable-width
+index values.
+
+We can safely rewrite index values in annotations if we guarantee that the
+new value is smaller than the original. This implies a two-pass approach:
+the first determines the set of indexes actually used, the second does the
+rewrite. Doing the rewrite in a single pass would be much harder.
+
+Instances of the "original" indices will still be found in the file; if
+we try to be all-inclusive we will include some stuff that doesn't need
+to be there (e.g. we don't generally need to cache the class name string
+index result, since once we have the class resolved we don't need to look
+it up by name through the resolver again). There is some potential for
+performance improvement by caching more than we strictly need, but we can
+afford to give up a little performance during class loading if it allows
+us to regain some memory.
+
+For safety and debugging, it's useful to distinguish the "compressed"
+constants in some way, e.g. setting the high bit when we rewrite them.
+In practice we don't have any free bits: indexes are usually 16-bit
+values, and we have more than 32,767 string constants in at least one of
+our core DEX files. Also, this does not work with constants embedded in
+annotations, because of the variable-width encoding.
+
+We should be safe if we can establish a clear distinction between sources
+of "original" and "compressed" indices. If the values get crossed up we
+can end up with elusive bugs. The easiest approach is to declare that
+only indices pulled from certain locations (the instruction stream and/or
+annotations) are compressed. This prevents us from adding indices in
+arbitrary locations to the compressed set, but should allow a reasonably
+robust implementation.
+
+
+Further implementation thoughts:
+
+ - We don't have to do annotations in the first pass. At heart the
+ resolved entity cache is a performance optimization, not necessary for
+ correctness, and we're not making annotation performance a priority
+ at this stage.
+ - The most important "fast path" is instruction processing. Everything
+ else can do additional work without having a measurable impact.
+ However...
+ - We need to keep an eye on uncached resolves to ensure that we haven't
+ introduced noticeable performance losses. In particular, the use of
+ runtime annotations with string constants may suffer if we don't include
+ annotation rewriting in the solution.
+ - We can have separate resolver functions for "original" and "compressed"
+ indices. This way we don't have to add a flag argument to the resolver
+ functions (which would require passing an additional parameter in from
+ the interpreter).
+ - The VM spec has some specific things to say about string constant
+ equality and interning. Index compression should have no effect on
+ that; we just change how long it takes to find the interned string in
+ certain circumstances. The impact can be mitigated somewhat by
+ improving the performance of the interned string table code.
+ - This can make e.g. method resolution slower. The method_id_item has
+ an index to a method name string, and we will no longer cache the
+ result of resolving that string. This impacts resolution of any method
+ with the same name as a previously-resolved method.
+ - We may need to tweak the tools, particularly "dexdump", to show the
+ translated values.
+ - We can use 16-bit values in the mapping table, since we should have
+ fewer than 2^16 remapped entries. If we overflow we can skip the remap
+ for that table or for the entire DEX file. The resolver will need to
+ check for the existence of the table to determine whether or not entries
+ must be remapped. The cost of the extra check is acceptable for
+ approach #2, since it's only at resolve time, but may be undesirable
+ for approach #1.
+*/
+/*
+Output Formats
+
+There are two possible output formats, from which we choose based on how
+we plan to take advantage of the remapped constants. At most one of these
+will appear in the DEX.
+
+NOTE: if EIXM appears in the DEX, the VM *must* be configured with
+DVM_RESOLVER_CACHE=DVM_RC_EXPANDING (2). Otherwise the constants we
+pull from the instruction stream will be wrong and we will fail quickly.
+
+For approach #1: map from original indices to the reduced set.
+
+ This includes the four "mapToNew" tables.
+
+ Format (RIXM):
+ u4 classCount // #of entries in classMap[]; == typeIdsSize
+ u4 reducedClassCount // #of entries in remapped table (for alloc)
+ u2 classMap[]
+ u4 methodCount
+ u4 reducedMethodCount
+ u2 methodMap[]
+ u4 fieldCount
+ u4 reducedFieldCount
+ u2 fieldMap[]
+ u4 stringCount
+ u4 reducedStringCount
+ u2 stringMap[]
+
+For approach #2: map from the reduced set back to the originals.
+
+ This includes the four "mapToOld" tables.
+
+ Format (EIXM):
+ u4 classCount // #of entries in classMap[]; post-reduction
+ u2 classMap[]
+ u4 methodCount
+ u2 methodMap[]
+ u4 fieldCount
+ u2 fieldMap[]
+ u4 stringCount
+ u2 stringMap[]
+
+The arrays are padded so that the "count" values are always aligned on
+32-bit boundaries. All multi-byte values are in native host order.
+*/
+
+
+/*
+ * Gather results from the post-optimization instruction scan.
+ */
+typedef struct ScanResults {
+ /* output */
+ BitVector* usedClasses;
+ BitVector* usedMethods;
+ BitVector* usedFields;
+ BitVector* usedStrings;
+} ScanResults;
+
+/* prototype for the for-all-methods function */
+typedef void (AllMethodsFunc)(DexFile* pDexFile, const char* classDescriptor,
+ DexMethod* pDexMethod, void* arg);
+
+
+/*
+ * Free scan results.
+ */
+static void freeScanResults(ScanResults* pResults)
+{
+ if (pResults == NULL)
+ return;
+
+ dvmFreeBitVector(pResults->usedClasses);
+ dvmFreeBitVector(pResults->usedMethods);
+ dvmFreeBitVector(pResults->usedFields);
+ dvmFreeBitVector(pResults->usedStrings);
+ free(pResults);
+}
+
+/*
+ * Allocate storage for the results of the instruction scan.
+ */
+static ScanResults* allocScanResults(const DexFile* pDexFile)
+{
+ ScanResults* pResults;
+ const DexHeader* pHeader = pDexFile->pHeader;
+
+ pResults = (ScanResults*) calloc(1, sizeof(ScanResults));
+ if (pResults == NULL)
+ return NULL;
+
+ pResults->usedClasses = dvmAllocBitVector(pHeader->typeIdsSize, false);
+ pResults->usedMethods = dvmAllocBitVector(pHeader->methodIdsSize, false);
+ pResults->usedFields = dvmAllocBitVector(pHeader->fieldIdsSize, false);
+ pResults->usedStrings = dvmAllocBitVector(pHeader->stringIdsSize, false);
+
+ if (pResults->usedClasses == NULL ||
+ pResults->usedMethods == NULL ||
+ pResults->usedFields == NULL ||
+ pResults->usedStrings == NULL)
+ {
+ freeScanResults(pResults);
+ return NULL;
+ }
+
+ return pResults;
+}
+
+/*
+ * Call "func(method, arg)" on all methods in the specified class.
+ *
+ * Pass in a pointer to the class_data_item, positioned at the start of
+ * the field data (i.e. just past the class data header).
+ *
+ * "classDescriptor" is for debug messages.
+ */
+static void forAllMethodsInClass(DexFile* pDexFile, const u1** ppEncodedData,
+ const DexClassDataHeader* pHeader, const char* classDescriptor,
+ AllMethodsFunc func, void* arg)
+{
+ int i;
+
+ /*
+ * Consume field data.
+ */
+ if (pHeader->staticFieldsSize != 0) {
+ int count = (int) pHeader->staticFieldsSize;
+ u4 lastIndex = 0;
+ DexField field;
+ for (i = 0; i < count; i++) {
+ dexReadClassDataField(ppEncodedData, &field, &lastIndex);
+ }
+ }
+ if (pHeader->instanceFieldsSize != 0) {
+ int count = (int) pHeader->instanceFieldsSize;
+ u4 lastIndex = 0;
+ DexField field;
+ for (i = 0; i < count; i++) {
+ dexReadClassDataField(ppEncodedData, &field, &lastIndex);
+ }
+ }
+
+ /*
+ * Run through all methods.
+ */
+ if (pHeader->directMethodsSize != 0) {
+ int count = (int) pHeader->directMethodsSize;
+ u4 lastIndex = 0;
+ DexMethod method;
+
+ for (i = 0; i < count; i++) {
+ dexReadClassDataMethod(ppEncodedData, &method, &lastIndex);
+ (func)(pDexFile, classDescriptor, &method, arg);
+ }
+ }
+ if (pHeader->virtualMethodsSize != 0) {
+ int count = (int) pHeader->virtualMethodsSize;
+ u4 lastIndex = 0;
+ DexMethod method;
+
+ for (i = 0; i < count; i++) {
+ dexReadClassDataMethod(ppEncodedData, &method, &lastIndex);
+ (func)(pDexFile, classDescriptor, &method, arg);
+ }
+ }
+}
+
+/*
+ * Call "func(method, arg)" on all methods in all classes in DexFile.
+ */
+static void forAllMethods(DexFile* pDexFile, AllMethodsFunc func, void* arg)
+{
+ u4 count = pDexFile->pHeader->classDefsSize;
+ u4 idx;
+
+ for (idx = 0; idx < count; idx++) {
+ const DexClassDef* pClassDef;
+ DexClassDataHeader header;
+ const u1* pEncodedData;
+
+ pClassDef = dexGetClassDef(pDexFile, idx);
+ pEncodedData = dexGetClassData(pDexFile, pClassDef);
+
+ const char* classDescriptor;
+ classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+ if (pEncodedData != NULL) {
+ dexReadClassDataHeader(&pEncodedData, &header);
+
+ forAllMethodsInClass(pDexFile, &pEncodedData, &header,
+ classDescriptor, func, arg);
+ } else {
+ //printf("%s: no class data\n", classDescriptor);
+ /* no class data, e.g. "marker interface" */
+ }
+ }
+}
+
+/*
+ * Mark a class ID as referenced.
+ */
+static void markClass(const u2* ptr, ScanResults* pResults)
+{
+ u2 classIdx = *ptr;
+ if (!dvmSetBit(pResults->usedClasses, classIdx)) {
+ LOGE("Unable to mark class %d as in-use\n", classIdx);
+ }
+}
+
+/*
+ * Mark a method ID as referenced.
+ */
+static void markMethod(const u2* ptr, ScanResults* pResults)
+{
+ u2 methodIdx = *ptr;
+ if (!dvmSetBit(pResults->usedMethods, methodIdx)) {
+ LOGE("Unable to mark method %d as in-use\n", methodIdx);
+ }
+}
+
+/*
+ * Mark a field ID as referenced.
+ */
+static void markField(const u2* ptr, ScanResults* pResults)
+{
+ u2 fieldIdx = *ptr;
+ if (!dvmSetBit(pResults->usedFields, fieldIdx)) {
+ LOGE("Unable to mark field %d as in-use\n", fieldIdx);
+ }
+}
+
+/*
+ * Mark a string constant as referenced.
+ */
+static void markString(const u2* ptr, ScanResults* pResults)
+{
+ u2 stringIdx = *ptr;
+ if (!dvmSetBit(pResults->usedStrings, stringIdx)) {
+ LOGE("Unable to mark string %d as in-use\n", stringIdx);
+ }
+}
+
+/*
+ * Mark a "jumbo" string constant as referenced.
+ */
+static void markJumboString(u2* ptr, ScanResults* pResults)
+{
+ u4 stringIdx;
+
+ /* it's in native byte order, but might not be 32-bit aligned */
+ memcpy(&stringIdx, ptr, sizeof(u4));
+ if (!dvmSetBit(pResults->usedStrings, stringIdx)) {
+ LOGE("Unable to mark string %d as in-use\n", stringIdx);
+ }
+}
+
+/*
+ * Remap a value in the instruction stream.
+ */
+static inline void updateValue(u2* ptr, const IndexMapSet* pIndexMapSet,
+ int whichMap)
+{
+ const IndexMap* pMap = &pIndexMapSet->map[whichMap];
+ if (pMap != NULL) {
+ u2 newIdx = pMap->mapToNew[*ptr];
+ assert(newIdx != kNoIndexMapping);
+ *ptr = newIdx;
+ }
+}
+static void updateClass(u2* ptr, const IndexMapSet* pIndexMapSet)
+{
+ updateValue(ptr, pIndexMapSet, kMapClasses);
+}
+static void updateMethod(u2* ptr, const IndexMapSet* pIndexMapSet)
+{
+ updateValue(ptr, pIndexMapSet, kMapMethods);
+}
+static void updateField(u2* ptr, const IndexMapSet* pIndexMapSet)
+{
+ updateValue(ptr, pIndexMapSet, kMapFields);
+}
+static void updateString(u2* ptr, const IndexMapSet* pIndexMapSet)
+{
+ updateValue(ptr, pIndexMapSet, kMapStrings);
+}
+static void updateJumboString(u2* ptr, const IndexMapSet* pIndexMapSet)
+{
+ u4 stringIdx;
+ u4 newIdx;
+
+ /* it's in native byte order, but might not be 32-bit aligned */
+ memcpy(&stringIdx, ptr, sizeof(stringIdx));
+
+ /* get new value */
+ newIdx = pIndexMapSet->map[kMapStrings].mapToNew[*ptr];
+ assert(newIdx != kNoIndexMapping);
+
+ /* copy it out */
+ memcpy(ptr, &newIdx, sizeof(newIdx));
+}
+
+/*
+ * Run through an instructions stream, marking constants as we see them.
+ *
+ * If "pResults" is non-NULL, we populate "pResults" with what we find,
+ * making no changes to the instruction stream.
+ *
+ * If "pIndexMapSet" is non-NULL, we rewrite the constants in the
+ * instruction stream.
+ */
+static void markUsedConstantsFromInsns(u2* insns, u4 insnsSize,
+ ScanResults* pResults, const IndexMapSet* pIndexMapSet)
+{
+ //printf(" %p %u units\n", insns, insnsSize);
+
+ while (insnsSize > 0) {
+ int width;
+ u2* pConst = insns + 1;
+
+ switch (*insns & 0xff) {
+ case OP_IGET:
+ case OP_IGET_WIDE:
+ case OP_IGET_OBJECT:
+ case OP_IGET_BOOLEAN:
+ case OP_IGET_BYTE:
+ case OP_IGET_CHAR:
+ case OP_IGET_SHORT:
+ case OP_IPUT:
+ case OP_IPUT_WIDE:
+ case OP_IPUT_OBJECT:
+ case OP_IPUT_BOOLEAN:
+ case OP_IPUT_BYTE:
+ case OP_IPUT_CHAR:
+ case OP_IPUT_SHORT:
+ case OP_SGET:
+ case OP_SGET_WIDE:
+ case OP_SGET_OBJECT:
+ case OP_SGET_BOOLEAN:
+ case OP_SGET_BYTE:
+ case OP_SGET_CHAR:
+ case OP_SGET_SHORT:
+ case OP_SPUT:
+ case OP_SPUT_WIDE:
+ case OP_SPUT_OBJECT:
+ case OP_SPUT_BOOLEAN:
+ case OP_SPUT_BYTE:
+ case OP_SPUT_CHAR:
+ case OP_SPUT_SHORT:
+ /* instanceop vA, vB, field@CCCC */
+ /* staticop vAA, field@BBBB */
+ if (pResults != NULL)
+ markField(pConst, pResults);
+ else
+ updateField(pConst, pIndexMapSet);
+ break;
+
+ case OP_CONST_STRING:
+ /* const-string vAA, string@BBBB */
+ if (pResults != NULL)
+ markString(pConst, pResults);
+ else
+ updateString(pConst, pIndexMapSet);
+ break;
+
+ case OP_CONST_STRING_JUMBO:
+ /* const-string/jumbo vAA, string@BBBBBBBB */
+ if (pResults != NULL)
+ markJumboString(pConst, pResults);
+ else
+ updateJumboString(pConst, pIndexMapSet);
+ break;
+
+ case OP_CONST_CLASS:
+ case OP_CHECK_CAST:
+ case OP_NEW_INSTANCE:
+ case OP_FILLED_NEW_ARRAY_RANGE:
+ case OP_INSTANCE_OF:
+ case OP_NEW_ARRAY:
+ case OP_FILLED_NEW_ARRAY:
+ /* const-class vAA, type@BBBB */
+ /* check-cast vAA, type@BBBB */
+ /* new-instance vAA, type@BBBB */
+ /* filled-new-array/range {vCCCC .. vNNNN}, type@BBBB */
+ /* instance-of vA, vB, type@CCCC */
+ /* new-array vA, vB, type@CCCC */
+ /* filled-new-array {vD, vE, vF, vG, vA}, type@CCCC */
+ if (pResults != NULL)
+ markClass(pConst, pResults);
+ else
+ updateClass(pConst, pIndexMapSet);
+ break;
+
+ case OP_INVOKE_VIRTUAL:
+ case OP_INVOKE_SUPER:
+ case OP_INVOKE_DIRECT:
+ case OP_INVOKE_STATIC:
+ case OP_INVOKE_INTERFACE:
+ case OP_INVOKE_VIRTUAL_RANGE:
+ case OP_INVOKE_SUPER_RANGE:
+ case OP_INVOKE_DIRECT_RANGE:
+ case OP_INVOKE_STATIC_RANGE:
+ case OP_INVOKE_INTERFACE_RANGE:
+ /* invoke-kind {vD, vE, vF, vG, vA}, meth@CCCC */
+ /* invoke-kind/range {vCCCC .. vNNNN}, meth@BBBB */
+ if (pResults != NULL)
+ markMethod(pConst, pResults);
+ else
+ updateMethod(pConst, pIndexMapSet);
+ break;
+
+ default:
+ // ignore this instruction
+ ;
+ }
+
+ width = dexGetInstrOrTableWidthAbs(gDvm.instrWidth, insns);
+ assert(width > 0 && width <= (int)insnsSize);
+
+ insns += width;
+ insnsSize -= width;
+ }
+}
+
+/*
+ * This is an AllMethodsFunc implementation.
+ *
+ * Run through the instructions in this method, setting bits in the "pResults"
+ * struct as we locate constants.
+ */
+static void markUsedConstants(DexFile* pDexFile, const char* classDescriptor,
+ DexMethod* pDexMethod, void* arg)
+{
+ ScanResults* pResults = (ScanResults*) arg;
+ const DexCode* pDexCode = dexGetCode(pDexFile, pDexMethod);
+
+ if (false) {
+ const DexMethodId* pMethodId;
+ const char* methodName;
+ pMethodId = dexGetMethodId(pDexFile, pDexMethod->methodIdx);
+ methodName = dexStringById(pDexFile, pMethodId->nameIdx);
+ printf(" %s.%s\n", classDescriptor, methodName);
+ }
+
+ if (pDexCode != NULL) {
+ u2* insns = (u2*) pDexCode->insns;
+ u4 insnsSize = pDexCode->insnsSize;
+ markUsedConstantsFromInsns(insns, insnsSize, pResults, NULL);
+ } else {
+ //printf(" (no code)\n");
+ }
+}
+
+/*
+ * This is an AllMethodsFunc implementation.
+ *
+ * Run through the instructions in this method, altering the constants used.
+ */
+static void updateUsedConstants(DexFile* pDexFile, const char* classDescriptor,
+ DexMethod* pDexMethod, void* arg)
+{
+ const IndexMapSet* pIndexMapSet = (const IndexMapSet*) arg;
+ const DexCode* pDexCode = dexGetCode(pDexFile, pDexMethod);
+
+ if (false) {
+ const DexMethodId* pMethodId;
+ const char* methodName;
+ pMethodId = dexGetMethodId(pDexFile, pDexMethod->methodIdx);
+ methodName = dexStringById(pDexFile, pMethodId->nameIdx);
+ printf(" %s.%s\n", classDescriptor, methodName);
+ }
+
+ if (pDexCode != NULL) {
+ u2* insns = (u2*) pDexCode->insns;
+ u4 insnsSize = pDexCode->insnsSize;
+ markUsedConstantsFromInsns(insns, insnsSize, NULL, pIndexMapSet);
+ } else {
+ //printf(" (no code)\n");
+ }
+}
+
+/*
+ * Count up the bits and show a count.
+ */
+static void showBitCount(const char* label, int setCount, int maxCount)
+{
+ printf("%s: %d of %d (%.1f%% unused)\n", label, setCount, maxCount,
+ ((maxCount - setCount) * 100.0f) / maxCount);
+}
+
+/*
+ * Print some debug info.
+ */
+static void summarizeResults(DvmDex* pDvmDex, ScanResults* pResults)
+{
+ DexFile* pDexFile = pDvmDex->pDexFile;
+ int i;
+
+#if 0
+ for (i = 0; i < (int) pDvmDex->pDexFile->pHeader->typeIdsSize; i++) {
+ const DexTypeId* pDexTypeId;
+ const char* classDescr;
+
+ pDexTypeId = dexGetTypeId(pDexFile, i);
+ classDescr = dexStringById(pDexFile, pDexTypeId->descriptorIdx);
+
+ if (dvmIsBitSet(pResults->usedStrings, i))
+ printf("used : %04x '%s'\n", i, classDescr);
+ else
+ printf("unused: %04x '%s'\n", i, classDescr);
+ }
+#endif
+#if 0
+ for (i = 0; i < (int) pDvmDex->pDexFile->pHeader->methodIdsSize; i++) {
+ const DexMethodId* pDexMethodId;
+ const DexTypeId* pDexTypeId;
+ const char* classDescr;
+ const char* methodName;
+
+ pDexMethodId = dexGetMethodId(pDexFile, i);
+ methodName = dexStringById(pDexFile, pDexMethodId->nameIdx);
+
+ pDexTypeId = dexGetTypeId(pDexFile, pDexMethodId->classIdx);
+ classDescr = dexStringById(pDexFile, pDexTypeId->descriptorIdx);
+ if (dvmIsBitSet(pResults->usedMethods, i))
+ printf("used : %s.%s\n", classDescr, methodName);
+ else
+ printf("unused: %s.%s\n", classDescr, methodName);
+ }
+#endif
+#if 0
+ for (i = 0; i < (int) pDvmDex->pDexFile->pHeader->fieldIdsSize; i++) {
+ const DexFieldId* pDexFieldId;
+ const DexTypeId* pDexTypeId;
+ const char* classDescr;
+ const char* fieldName;
+
+ pDexFieldId = dexGetFieldId(pDexFile, i);
+ fieldName = dexStringById(pDexFile, pDexFieldId->nameIdx);
+
+ pDexTypeId = dexGetTypeId(pDexFile, pDexFieldId->classIdx);
+ classDescr = dexStringById(pDexFile, pDexTypeId->descriptorIdx);
+ if (dvmIsBitSet(pResults->usedFields, i))
+ printf("used : %s.%s\n", classDescr, fieldName);
+ else
+ printf("unused: %s.%s\n", classDescr, fieldName);
+ }
+#endif
+#if 0
+ for (i = 0; i < (int) pDvmDex->pDexFile->pHeader->stringIdsSize; i++) {
+ const char* str;
+
+ str = dexStringById(pDexFile, i);
+
+ if (dvmIsBitSet(pResults->usedStrings, i))
+ printf("used : %04x '%s'\n", i, str);
+ else
+ printf("unused: %04x '%s'\n", i, str);
+ }
+#endif
+
+ int totalMax, totalSet;
+ int setCount;
+
+ totalMax = totalSet = 0;
+
+ setCount = dvmCountSetBits(pResults->usedClasses);
+ showBitCount("classes", setCount, pDexFile->pHeader->typeIdsSize);
+ totalSet += setCount;
+ totalMax += pDexFile->pHeader->typeIdsSize;
+
+ setCount = dvmCountSetBits(pResults->usedMethods);
+ showBitCount("methods", setCount, pDexFile->pHeader->methodIdsSize);
+ totalSet += setCount;
+ totalMax += pDexFile->pHeader->methodIdsSize;
+
+ setCount = dvmCountSetBits(pResults->usedFields);
+ showBitCount("fields", setCount, pDexFile->pHeader->fieldIdsSize);
+ totalSet += setCount;
+ totalMax += pDexFile->pHeader->fieldIdsSize;
+
+ setCount = dvmCountSetBits(pResults->usedStrings);
+ showBitCount("strings", setCount, pDexFile->pHeader->stringIdsSize);
+ totalSet += setCount;
+ totalMax += pDexFile->pHeader->stringIdsSize;
+
+ printf("TOTAL %d of %d (%.1f%% unused -- %.1fK)\n", totalSet, totalMax,
+ ((totalMax - totalSet) * 100.0f) / totalMax,
+ (totalMax - totalSet) / 256.0f);
+}
+
+/*
+ * Fill out an index map set entry.
+ *
+ * If we can't fit the map into our base type, we don't create the map.
+ *
+ * Returns "false" if allocation fails.
+ */
+static bool constructIndexMap(int totalCount, const BitVector* pBits,
+ IndexMap* pMap)
+{
+ const int kMaxIndex = 65534; // 65535, a/k/a -1, is special
+ int setCount;
+
+ setCount = dvmCountSetBits(pBits);
+ if (setCount < 0 || setCount > kMaxIndex)
+ return true;
+
+ u2* mapToOld = (u2*) malloc(setCount * sizeof(u2));
+ u2* mapToNew = (u2*) malloc(totalCount * sizeof(u2));
+ if (mapToOld == NULL || mapToNew == NULL) {
+ free(mapToOld);
+ free(mapToNew);
+ return false;
+ }
+
+ /* fill in both arrays */
+ int entry, idx = 0;
+ for (entry = 0; entry < totalCount; entry++) {
+ if (dvmIsBitSet(pBits, entry)) {
+ mapToNew[entry] = idx;
+ mapToOld[idx] = entry;
+ idx++;
+ } else {
+ mapToNew[entry] = kNoIndexMapping;
+ }
+ }
+
+ if (idx != setCount) {
+ LOGE("GLITCH: idx=%d setCount=%d\n", idx, setCount);
+ dvmAbort();
+ }
+
+ /* success */
+ pMap->mapToOld = mapToOld;
+ pMap->mapToNew = mapToNew;
+ pMap->origCount = totalCount;
+ pMap->newCount = setCount;
+
+ return true;
+}
+
+/*
+ * Construct a "reducing" chunk, with maps that convert the constants in
+ * instructions to their reduced value for the cache lookup.
+ */
+static bool constructReducingDataChunk(IndexMapSet* pIndexMapSet)
+{
+ int chunkLen = 0;
+ int i;
+
+ pIndexMapSet->chunkType = kDexChunkReducingIndexMap;
+
+ /*
+ * Compute space requirements and allocate storage.
+ */
+ for (i = 0; i < kNumIndexMaps; i++) {
+ /* space for the "original" count */
+ chunkLen += sizeof(u4);
+
+ /* space for the "reduced" count */
+ chunkLen += sizeof(u4);
+
+ /* add data length, round up to 32-bit boundary */
+ chunkLen += pIndexMapSet->map[i].origCount * sizeof(u2);
+ chunkLen = (chunkLen + 3) & ~3;
+ }
+
+ pIndexMapSet->chunkDataLen = chunkLen;
+ pIndexMapSet->chunkData = (u1*) calloc(1, chunkLen);
+ if (pIndexMapSet->chunkData == NULL)
+ return false;
+
+ /*
+ * Copy the data in.
+ */
+ u1* ptr = pIndexMapSet->chunkData;
+ for (i = 0; i < kNumIndexMaps; i++) {
+ u4* wordPtr = (u4*) ptr;
+ int dataLen = pIndexMapSet->map[i].origCount * sizeof(u2);
+
+ *wordPtr++ = pIndexMapSet->map[i].origCount;
+ *wordPtr++ = pIndexMapSet->map[i].newCount;
+ if (dataLen != 0)
+ memcpy(wordPtr, pIndexMapSet->map[i].mapToNew, dataLen);
+
+ /* advance pointer, maintaining 32-bit alignment */
+ ptr = ((u1*) wordPtr) + dataLen;
+ ptr = (u1*) (((int) ptr + 3) & ~3);
+ }
+
+ if (ptr - (u1*) pIndexMapSet->chunkData != chunkLen) {
+ LOGE("GLITCH: expected len=%d, actual=%d\n",
+ chunkLen, ptr - (u1*) pIndexMapSet->chunkData);
+ dvmAbort();
+ }
+
+ return true;
+}
+
+/*
+ * Construct an "expanding" chunk, with maps that convert instructions
+ * with reduced constants back to their full original values.
+ */
+static bool constructExpandingDataChunk(IndexMapSet* pIndexMapSet)
+{
+ int chunkLen = 0;
+ int i;
+
+ pIndexMapSet->chunkType = kDexChunkExpandingIndexMap;
+
+ /*
+ * Compute space requirements and allocate storage.
+ */
+ for (i = 0; i < kNumIndexMaps; i++) {
+ /* space for the length word */
+ chunkLen += sizeof(u4);
+
+ /* add data length, round up to 32-bit boundary */
+ chunkLen += pIndexMapSet->map[i].newCount * sizeof(u2);
+ chunkLen = (chunkLen + 3) & ~3;
+ }
+
+ pIndexMapSet->chunkDataLen = chunkLen;
+ pIndexMapSet->chunkData = (u1*) calloc(1, chunkLen);
+ if (pIndexMapSet->chunkData == NULL)
+ return false;
+
+ /*
+ * Copy the data in.
+ */
+ u1* ptr = pIndexMapSet->chunkData;
+ for (i = 0; i < kNumIndexMaps; i++) {
+ u4* wordPtr = (u4*) ptr;
+ int dataLen = pIndexMapSet->map[i].newCount * sizeof(u2);
+
+ *wordPtr++ = pIndexMapSet->map[i].newCount;
+ if (dataLen != 0)
+ memcpy(wordPtr, pIndexMapSet->map[i].mapToOld, dataLen);
+
+ /* advance pointer, maintaining 32-bit alignment */
+ ptr = ((u1*) wordPtr) + dataLen;
+ ptr = (u1*) (((int) ptr + 3) & ~3);
+ }
+
+ if (ptr - (u1*) pIndexMapSet->chunkData != chunkLen) {
+ LOGE("GLITCH: expected len=%d, actual=%d\n",
+ chunkLen, ptr - (u1*) pIndexMapSet->chunkData);
+ dvmAbort();
+ }
+
+ return true;
+}
+
+/*
+ * Construct the "chunk" of data that will be appended to the optimized DEX
+ * file.
+ */
+static bool constructDataChunk(IndexMapSet* pIndexMapSet)
+{
+ assert(sizeof(pIndexMapSet->map[0].mapToOld[0]) == sizeof(u2));
+ assert(sizeof(pIndexMapSet->map[0].mapToNew[0]) == sizeof(u2));
+
+#if DVM_RESOLVER_CACHE == DVM_RC_EXPANDING
+ return constructExpandingDataChunk(pIndexMapSet);
+#else
+ return constructReducingDataChunk(pIndexMapSet);
+#endif
+}
+
+/*
+ * Allocate storage to hold the maps.
+ */
+static IndexMapSet* createIndexMapSet(const DexFile* pDexFile,
+ ScanResults* pResults)
+{
+ IndexMapSet* pIndexMapSet;
+ int setCount;
+ bool okay = true;
+
+ pIndexMapSet = calloc(1, sizeof(*pIndexMapSet));
+ if (pIndexMapSet == NULL)
+ return NULL;
+
+ okay = okay && constructIndexMap(pDexFile->pHeader->typeIdsSize,
+ pResults->usedClasses, &pIndexMapSet->map[kMapClasses]);
+ okay = okay && constructIndexMap(pDexFile->pHeader->methodIdsSize,
+ pResults->usedMethods, &pIndexMapSet->map[kMapMethods]);
+ okay = okay && constructIndexMap(pDexFile->pHeader->fieldIdsSize,
+ pResults->usedFields, &pIndexMapSet->map[kMapFields]);
+ okay = okay && constructIndexMap(pDexFile->pHeader->stringIdsSize,
+ pResults->usedStrings, &pIndexMapSet->map[kMapStrings]);
+
+ LOGVV("Constr: %d %d %d %d\n",
+ pIndexMapSet->map[kMapClasses].mapToOld[0],
+ pIndexMapSet->map[kMapMethods].mapToOld[0],
+ pIndexMapSet->map[kMapFields].mapToOld[0],
+ pIndexMapSet->map[kMapStrings].mapToOld[0]);
+
+ okay = okay && constructDataChunk(pIndexMapSet);
+
+ if (!okay) {
+ dvmFreeIndexMapSet(pIndexMapSet);
+ return NULL;
+ }
+
+ return pIndexMapSet;
+}
+
+/*
+ * Free map storage.
+ *
+ * "pIndexMapSet" may be incomplete.
+ */
+void dvmFreeIndexMapSet(IndexMapSet* pIndexMapSet)
+{
+ int i;
+
+ if (pIndexMapSet == NULL)
+ return;
+
+ for (i = 0; i < kNumIndexMaps; i++) {
+ free(pIndexMapSet->map[i].mapToOld);
+ free(pIndexMapSet->map[i].mapToNew);
+ }
+ free(pIndexMapSet->chunkData);
+ free(pIndexMapSet);
+}
+
+/*
+ * Rewrite constant indexes to reduce heap requirements.
+ */
+IndexMapSet* dvmRewriteConstants(DvmDex* pDvmDex)
+{
+#if (DVM_RESOLVER_CACHE != DVM_RC_REDUCING) && \
+ (DVM_RESOLVER_CACHE != DVM_RC_EXPANDING)
+ /* nothing to do */
+ return NULL;
+#endif
+
+ /*
+ * We're looking for instructions that use "constant pool" entries for
+ * classes, methods, fields, and strings. Many field and method entries
+ * are optimized away, and many string constants are never accessed from
+ * code or annotations.
+ */
+ ScanResults* pResults = allocScanResults(pDvmDex->pDexFile);
+ forAllMethods(pDvmDex->pDexFile, markUsedConstants, pResults);
+
+ summarizeResults(pDvmDex, pResults);
+
+ /*
+ * Allocate and populate the index maps.
+ */
+ IndexMapSet* pIndexMapSet = createIndexMapSet(pDvmDex->pDexFile, pResults);
+#if DVM_RESOLVER_CACHE == DVM_RC_EXPANDING
+ if (pIndexMapSet != NULL) {
+ /*
+ * Rewrite the constants to use the reduced set.
+ */
+ forAllMethods(pDvmDex->pDexFile, updateUsedConstants, pIndexMapSet);
+ }
+#endif
+
+ freeScanResults(pResults);
+
+ return pIndexMapSet;
+}
+
diff --git a/vm/analysis/ReduceConstants.h b/vm/analysis/ReduceConstants.h
new file mode 100644
index 0000000..342e125
--- /dev/null
+++ b/vm/analysis/ReduceConstants.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * DEX constant-reduction declarations.
+ */
+#ifndef _DALVIK_REDUCECONSTANTS
+#define _DALVIK_REDUCECONSTANTS
+
+#define DVM_RC_DISABLED 0 /* no reduction, 1:1 map */
+#define DVM_RC_REDUCING 1 /* normal constants, reduced lookup table */
+#define DVM_RC_EXPANDING 2 /* reduced constants, expanded on resolve */
+#define DVM_RC_NO_CACHE 3 /* disable the cache (reduce to zero) */
+
+enum {
+ kMapClasses = 0,
+ kMapMethods = 1,
+ kMapFields = 2,
+ kMapStrings = 3,
+
+ kNumIndexMaps
+};
+
+struct DvmDex;
+
+#define kNoIndexMapping ((u2) -1)
+
+/*
+ * Map indices back to the original.
+ */
+typedef struct IndexMap {
+ int origCount; /* original size; describes range of entries in map */
+ int newCount; /* reduced size */
+ u2* mapToNew; /* sparse map, from "orig" to "new" */
+ u2* mapToOld; /* dense map, from "new" back to "orig" */
+} IndexMap;
+typedef struct IndexMapSet {
+ /* maps for the different sections */
+ IndexMap map[kNumIndexMaps];
+
+ /* data stream that gets appended to the optimized DEX file */
+ u4 chunkType;
+ int chunkDataLen;
+ u1* chunkData;
+} IndexMapSet;
+
+/*
+ * Constant pool compaction.
+ *
+ * The caller is responsible for freeing the returned structure by
+ * calling dvmFreeIndexMap().
+ */
+IndexMapSet* dvmRewriteConstants(struct DvmDex* pDvmDex);
+
+/* free an index map set */
+void dvmFreeIndexMapSet(IndexMapSet* indexMapSet);
+
+#endif /*_DALVIK_REDUCECONSTANTS*/
diff --git a/vm/analysis/RegisterMap.c b/vm/analysis/RegisterMap.c
new file mode 100644
index 0000000..b02874a
--- /dev/null
+++ b/vm/analysis/RegisterMap.c
@@ -0,0 +1,1692 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// ** UNDER CONSTRUCTION **
+
+/*
+ * This code generate "register maps" for Dalvik bytecode. In a stack-based
+ * VM we might call these "stack maps". They are used to increase the
+ * precision in the garbage collector when scanning references in the
+ * interpreter thread stacks.
+ */
+#include "Dalvik.h"
+#include "analysis/CodeVerify.h"
+#include "analysis/RegisterMap.h"
+#include "libdex/DexCatch.h"
+#include "libdex/InstrUtils.h"
+
+#include <stddef.h>
+
+
+/*
+Notes on just-in-time RegisterMap generation
+
+Generating RegisterMap tables as part of verification is convenient because
+we generate most of what we need to know as part of doing the verify.
+The negative aspect of doing it this way is that we must store the
+result in the DEX file (if we're verifying ahead of time) or in memory
+(if verifying during class load) for every concrete non-native method,
+even if we never actually need the map during a GC.
+
+A simple but compact encoding of register map data increases the size of
+optimized DEX files by about 25%, so size considerations are important.
+
+We can instead generate the RegisterMap at the point where it is needed.
+In a typical application we only need to convert about 2% of the loaded
+methods, and we can generate type-precise roots reasonably quickly because
+(a) we know the method has already been verified and hence can make a
+lot of assumptions, and (b) we don't care what type of object a register
+holds, just whether or not it holds a reference, and hence can skip a
+lot of class resolution gymnastics.
+
+There are a couple of problems with this approach however. First, to
+get good performance we really want an implementation that is largely
+independent from the verifier, which means some duplication of effort.
+Second, we're dealing with post-dexopt code, which contains "quickened"
+instructions. We can't process those without either tracking type
+information (which slows us down) or storing additional data in the DEX
+file that allows us to reconstruct the original instructions (adds ~5%
+to the size of the ODEX).
+
+
+Implementation notes...
+
+Both type-precise and live-precise information can be generated knowing
+only whether or not a register holds a reference. We don't need to
+know what kind of reference or whether the object has been initialized.
+Not only can we skip many of the fancy steps in the verifier, we can
+initialize from simpler sources, e.g. the initial registers and return
+type are set from the "shorty" signature rather than the full signature.
+
+The short-term storage needs for just-in-time register map generation can
+be much lower because we can use a 1-byte SRegType instead of a 4-byte
+RegType. On the other hand, if we're not doing type-precise analysis
+in the verifier we only need to store register contents at every branch
+target, rather than every GC point (which are much more frequent).
+
+Whether it happens in the verifier or independently, because this is done
+with native heap allocations that may be difficult to return to the system,
+an effort should be made to minimize memory use.
+*/
+
+// fwd
+static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data);
+static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap);
+
+/*
+ * Generate the register map for a method that has just been verified
+ * (i.e. we're doing this as part of verification).
+ *
+ * For type-precise determination we have all the data we need, so we
+ * just need to encode it in some clever fashion.
+ *
+ * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
+ */
+RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata)
+{
+ RegisterMap* pMap = NULL;
+ RegisterMap* pResult = NULL;
+ RegisterMapFormat format;
+ u1 regWidth;
+ u1* mapData;
+ int i, bytesForAddr, gcPointCount;
+ int bufSize;
+
+ regWidth = (vdata->method->registersSize + 7) / 8;
+ if (vdata->insnsSize < 256) {
+ format = kFormatCompact8;
+ bytesForAddr = 1;
+ } else {
+ format = kFormatCompact16;
+ bytesForAddr = 2;
+ }
+
+ /*
+ * Count up the number of GC point instructions.
+ *
+ * NOTE: this does not automatically include the first instruction,
+ * since we don't count method entry as a GC point.
+ */
+ gcPointCount = 0;
+ for (i = 0; i < vdata->insnsSize; i++) {
+ if (dvmInsnIsGcPoint(vdata->insnFlags, i))
+ gcPointCount++;
+ }
+ if (gcPointCount >= 65536) {
+ /* we could handle this, but in practice we don't get near this */
+ LOGE("ERROR: register map can't handle %d gc points in one method\n",
+ gcPointCount);
+ goto bail;
+ }
+
+ /*
+ * Allocate a buffer to hold the map data.
+ */
+ bufSize = offsetof(RegisterMap, data);
+ bufSize += gcPointCount * (bytesForAddr + regWidth);
+
+ LOGD("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)\n",
+ vdata->method->clazz->descriptor, vdata->method->name,
+ bytesForAddr, gcPointCount, regWidth, bufSize);
+
+ pMap = (RegisterMap*) malloc(bufSize);
+ pMap->format = format;
+ pMap->regWidth = regWidth;
+ pMap->numEntries = gcPointCount;
+
+ /*
+ * Populate it.
+ */
+ mapData = pMap->data;
+ for (i = 0; i < vdata->insnsSize; i++) {
+ if (dvmInsnIsGcPoint(vdata->insnFlags, i)) {
+ assert(vdata->addrRegs[i] != NULL);
+ if (format == kFormatCompact8) {
+ *mapData++ = i;
+ } else /*kFormatCompact16*/ {
+ *mapData++ = i & 0xff;
+ *mapData++ = i >> 8;
+ }
+ outputTypeVector(vdata->addrRegs[i], vdata->insnRegCount, mapData);
+ mapData += regWidth;
+ }
+ }
+
+ LOGI("mapData=%p pMap=%p bufSize=%d\n", mapData, pMap, bufSize);
+ assert(mapData - (const u1*) pMap == bufSize);
+
+#if 1
+ if (!verifyMap(vdata, pMap))
+ goto bail;
+#endif
+
+ pResult = pMap;
+
+bail:
+ return pResult;
+}
+
+/*
+ * Release the storage held by a RegisterMap.
+ */
+void dvmFreeRegisterMap(RegisterMap* pMap)
+{
+ if (pMap == NULL)
+ return;
+
+ free(pMap);
+}
+
+/*
+ * Determine if the RegType value is a reference type.
+ *
+ * Ordinarily we include kRegTypeZero in the "is it a reference"
+ * check. There's no value in doing so here, because we know
+ * the register can't hold anything but zero.
+ */
+static inline bool isReferenceType(RegType type)
+{
+ return (type > kRegTypeMAX || type == kRegTypeUninit);
+}
+
+/*
+ * Given a line of registers, output a bit vector that indicates whether
+ * or not the register holds a reference type (which could be null).
+ *
+ * We use '1' to indicate it's a reference, '0' for anything else (numeric
+ * value, uninitialized data, merge conflict). Register 0 will be found
+ * in the low bit of the first byte.
+ */
+static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data)
+{
+ u1 val = 0;
+ int i;
+
+ for (i = 0; i < insnRegCount; i++) {
+ RegType type = *regs++;
+ val >>= 1;
+ if (isReferenceType(type))
+ val |= 0x80; /* set hi bit */
+
+ if ((i & 0x07) == 7)
+ *data++ = val;
+ }
+ if ((i & 0x07) != 0) {
+ /* flush bits from last byte */
+ val >>= 8 - (i & 0x07);
+ *data++ = val;
+ }
+}
+
+/*
+ * Double-check the map.
+ *
+ * We run through all of the data in the map, and compare it to the original.
+ */
+static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap)
+{
+ const u1* data = pMap->data;
+ int ent;
+
+ for (ent = 0; ent < pMap->numEntries; ent++) {
+ int addr;
+
+ switch (pMap->format) {
+ case kFormatCompact8:
+ addr = *data++;
+ break;
+ case kFormatCompact16:
+ addr = *data++;
+ addr |= (*data++) << 8;
+ break;
+ default:
+ /* shouldn't happen */
+ LOGE("GLITCH: bad format (%d)", pMap->format);
+ dvmAbort();
+ }
+
+ const RegType* regs = vdata->addrRegs[addr];
+ if (regs == NULL) {
+ LOGE("GLITCH: addr %d has no data\n", addr);
+ return false;
+ }
+
+ u1 val;
+ int i;
+
+ for (i = 0; i < vdata->method->registersSize; i++) {
+ bool bitIsRef, regIsRef;
+
+ val >>= 1;
+ if ((i & 0x07) == 0) {
+ /* load next byte of data */
+ val = *data++;
+ }
+
+ bitIsRef = val & 0x01;
+
+ RegType type = regs[i];
+ regIsRef = isReferenceType(type);
+
+ if (bitIsRef != regIsRef) {
+ LOGE("GLITCH: addr %d reg %d: bit=%d reg=%d(%d)\n",
+ addr, i, bitIsRef, regIsRef, type);
+ return false;
+ }
+ }
+
+ /* print the map as a binary string */
+ if (false) {
+ char outBuf[vdata->method->registersSize +1];
+ for (i = 0; i < vdata->method->registersSize; i++) {
+ if (isReferenceType(regs[i])) {
+ outBuf[i] = '1';
+ } else {
+ outBuf[i] = '0';
+ }
+ }
+ outBuf[i] = '\0';
+ LOGD(" %04d %s\n", addr, outBuf);
+ }
+ }
+
+ return true;
+}
+
+
+/*
+ * ===========================================================================
+ * Just-in-time generation
+ * ===========================================================================
+ */
+
+#if 0 /* incomplete implementation; may be removed entirely in the future */
+
+/*
+ * This is like RegType in the verifier, but simplified. It holds a value
+ * from the reg type enum, or kRegTypeReference.
+ */
+typedef u1 SRegType;
+#define kRegTypeReference kRegTypeMAX
+
+/*
+ * We need an extra "pseudo register" to hold the return type briefly. It
+ * can be category 1 or 2, so we need two slots.
+ */
+#define kExtraRegs 2
+#define RESULT_REGISTER(_insnRegCountPlus) (_insnRegCountPlus - kExtraRegs)
+
+/*
+ * Working state.
+ */
+typedef struct WorkState {
+ /*
+ * The method we're working on.
+ */
+ const Method* method;
+
+ /*
+ * Number of instructions in the method.
+ */
+ int insnsSize;
+
+ /*
+ * Number of registers we track for each instruction. This is equal
+ * to the method's declared "registersSize" plus kExtraRegs.
+ */
+ int insnRegCountPlus;
+
+ /*
+ * Instruction widths and flags, one entry per code unit.
+ */
+ InsnFlags* insnFlags;
+
+ /*
+ * Array of SRegType arrays, one entry per code unit. We only need
+ * to create an entry when an instruction starts at this address.
+ * We can further reduce this to instructions that are GC points.
+ *
+ * We could just go ahead and allocate one per code unit, but for
+ * larger methods that can represent a significant bit of short-term
+ * storage.
+ */
+ SRegType** addrRegs;
+
+ /*
+ * A single large alloc, with all of the storage needed for addrRegs.
+ */
+ SRegType* regAlloc;
+} WorkState;
+
+// fwd
+static bool generateMap(WorkState* pState, RegisterMap* pMap);
+static bool analyzeMethod(WorkState* pState);
+static bool handleInstruction(WorkState* pState, SRegType* workRegs,\
+ int insnIdx, int* pStartGuess);
+static void updateRegisters(WorkState* pState, int nextInsn,\
+ const SRegType* workRegs);
+
+
+/*
+ * Set instruction flags.
+ */
+static bool setInsnFlags(WorkState* pState, int* pGcPointCount)
+{
+ const Method* meth = pState->method;
+ InsnFlags* insnFlags = pState->insnFlags;
+ int insnsSize = pState->insnsSize;
+ const u2* insns = meth->insns;
+ int gcPointCount = 0;
+ int offset;
+
+ /* set the widths */
+ if (!dvmComputeCodeWidths(meth, pState->insnFlags, NULL))
+ return false;
+
+ /* mark "try" regions and exception handler branch targets */
+ if (!dvmSetTryFlags(meth, pState->insnFlags))
+ return false;
+
+ /* the start of the method is a "branch target" */
+ dvmInsnSetBranchTarget(insnFlags, 0, true);
+
+ /*
+ * Run through the instructions, looking for switches and branches.
+ * Mark their targets.
+ *
+ * We don't really need to "check" these instructions -- the verifier
+ * already did that -- but the additional overhead isn't significant
+ * enough to warrant making a second copy of the "Check" function.
+ *
+ * Mark and count GC points while we're at it.
+ */
+ for (offset = 0; offset < insnsSize; offset++) {
+ static int gcMask = kInstrCanBranch | kInstrCanSwitch |
+ kInstrCanThrow | kInstrCanReturn;
+ u1 opcode = insns[offset] & 0xff;
+ InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode);
+
+ if (opFlags & kInstrCanBranch) {
+ if (!dvmCheckBranchTarget(meth, insnFlags, offset, true))
+ return false;
+ }
+ if (opFlags & kInstrCanSwitch) {
+ if (!dvmCheckSwitchTargets(meth, insnFlags, offset))
+ return false;
+ }
+
+ if ((opFlags & gcMask) != 0) {
+ dvmInsnSetGcPoint(pState->insnFlags, offset, true);
+ gcPointCount++;
+ }
+ }
+
+ *pGcPointCount = gcPointCount;
+ return true;
+}
+
+/*
+ * Generate the register map for a method.
+ *
+ * Returns a pointer to newly-allocated storage.
+ */
+RegisterMap* dvmGenerateRegisterMap(const Method* meth)
+{
+ WorkState* pState = NULL;
+ RegisterMap* pMap = NULL;
+ RegisterMap* result = NULL;
+ SRegType* regPtr;
+
+ pState = (WorkState*) calloc(1, sizeof(WorkState));
+ if (pState == NULL)
+ goto bail;
+
+ pMap = (RegisterMap*) calloc(1, sizeof(RegisterMap));
+ if (pMap == NULL)
+ goto bail;
+
+ pState->method = meth;
+ pState->insnsSize = dvmGetMethodInsnsSize(meth);
+ pState->insnRegCountPlus = meth->registersSize + kExtraRegs;
+
+ pState->insnFlags = calloc(sizeof(InsnFlags), pState->insnsSize);
+ pState->addrRegs = calloc(sizeof(SRegType*), pState->insnsSize);
+
+ /*
+ * Set flags on instructions, and calculate the number of code units
+ * that happen to be GC points.
+ */
+ int gcPointCount;
+ if (!setInsnFlags(pState, &gcPointCount))
+ goto bail;
+
+ if (gcPointCount == 0) {
+ /* the method doesn't allocate or call, and never returns? unlikely */
+ LOG_VFY_METH(meth, "Found do-nothing method\n");
+ goto bail;
+ }
+
+ pState->regAlloc = (SRegType*)
+ calloc(sizeof(SRegType), pState->insnsSize * gcPointCount);
+ regPtr = pState->regAlloc;
+
+ /*
+ * For each instruction that is a GC point, set a pointer into the
+ * regAlloc buffer.
+ */
+ int offset;
+ for (offset = 0; offset < pState->insnsSize; offset++) {
+ if (dvmInsnIsGcPoint(pState->insnFlags, offset)) {
+ pState->addrRegs[offset] = regPtr;
+ regPtr += pState->insnRegCountPlus;
+ }
+ }
+ assert(regPtr - pState->regAlloc == pState->insnsSize * gcPointCount);
+ assert(pState->addrRegs[0] != NULL);
+
+ /*
+ * Compute the register map.
+ */
+ if (!generateMap(pState, pMap))
+ goto bail;
+
+ /* success */
+ result = pMap;
+ pMap = NULL;
+
+bail:
+ if (pState != NULL) {
+ free(pState->insnFlags);
+ free(pState->addrRegs);
+ free(pState->regAlloc);
+ free(pState);
+ }
+ if (pMap != NULL)
+ dvmFreeRegisterMap(pMap);
+ return result;
+}
+
+/*
+ * Release the storage associated with a RegisterMap.
+ */
+void dvmFreeRegisterMap(RegisterMap* pMap)
+{
+ if (pMap == NULL)
+ return;
+}
+
+
+/*
+ * Create the RegisterMap using the provided state.
+ */
+static bool generateMap(WorkState* pState, RegisterMap* pMap)
+{
+ bool result = false;
+
+ /*
+ * Analyze the method and store the results in WorkState.
+ */
+ if (!analyzeMethod(pState))
+ goto bail;
+
+ /*
+ * Convert the analyzed data into a RegisterMap.
+ */
+ // TODO
+
+ result = true;
+
+bail:
+ return result;
+}
+
+/*
+ * Set the register types for the method arguments. We can pull the values
+ * out of the "shorty" signature.
+ */
+static bool setTypesFromSignature(WorkState* pState)
+{
+ const Method* meth = pState->method;
+ int argReg = meth->registersSize - meth->insSize; /* first arg */
+ SRegType* pRegs = pState->addrRegs[0];
+ SRegType* pCurReg = &pRegs[argReg];
+ const char* ccp;
+
+ /*
+ * Include "this" pointer, if appropriate.
+ */
+ if (!dvmIsStaticMethod(meth)) {
+ *pCurReg++ = kRegTypeReference;
+ }
+
+ ccp = meth->shorty +1; /* skip first byte, which holds return type */
+ while (*ccp != 0) {
+ switch (*ccp) {
+ case 'L':
+ //case '[':
+ *pCurReg++ = kRegTypeReference;
+ break;
+ case 'Z':
+ *pCurReg++ = kRegTypeBoolean;
+ break;
+ case 'C':
+ *pCurReg++ = kRegTypeChar;
+ break;
+ case 'B':
+ *pCurReg++ = kRegTypeByte;
+ break;
+ case 'I':
+ *pCurReg++ = kRegTypeInteger;
+ break;
+ case 'S':
+ *pCurReg++ = kRegTypeShort;
+ break;
+ case 'F':
+ *pCurReg++ = kRegTypeFloat;
+ break;
+ case 'D':
+ *pCurReg++ = kRegTypeDoubleLo;
+ *pCurReg++ = kRegTypeDoubleHi;
+ break;
+ case 'J':
+ *pCurReg++ = kRegTypeLongLo;
+ *pCurReg++ = kRegTypeLongHi;
+ break;
+ default:
+ assert(false);
+ return false;
+ }
+ }
+
+ assert(pCurReg - pRegs == meth->insSize);
+ return true;
+}
+
+/*
+ * Find the start of the register set for the specified instruction in
+ * the current method.
+ */
+static inline SRegType* getRegisterLine(const WorkState* pState, int insnIdx)
+{
+ return pState->addrRegs[insnIdx];
+}
+
+/*
+ * Copy a set of registers.
+ */
+static inline void copyRegisters(SRegType* dst, const SRegType* src,
+ int numRegs)
+{
+ memcpy(dst, src, numRegs * sizeof(SRegType));
+}
+
+/*
+ * Compare a set of registers. Returns 0 if they match.
+ */
+static inline int compareRegisters(const SRegType* src1, const SRegType* src2,
+ int numRegs)
+{
+ return memcmp(src1, src2, numRegs * sizeof(SRegType));
+}
+
+/*
+ * Run through the instructions repeatedly until we have exercised all
+ * possible paths.
+ */
+static bool analyzeMethod(WorkState* pState)
+{
+ const Method* meth = pState->method;
+ SRegType workRegs[pState->insnRegCountPlus];
+ InsnFlags* insnFlags = pState->insnFlags;
+ int insnsSize = pState->insnsSize;
+ int insnIdx, startGuess;
+ bool result = false;
+
+ /*
+ * Initialize the types of the registers that correspond to method
+ * arguments.
+ */
+ if (!setTypesFromSignature(pState))
+ goto bail;
+
+ /*
+ * Mark the first instruction as "changed".
+ */
+ dvmInsnSetChanged(insnFlags, 0, true);
+ startGuess = 0;
+
+ if (true) {
+ IF_LOGI() {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOGI("Now mapping: %s.%s %s (ins=%d regs=%d)\n",
+ meth->clazz->descriptor, meth->name, desc,
+ meth->insSize, meth->registersSize);
+ LOGI(" ------ [0 4 8 12 16 20 24 28 32 36\n");
+ free(desc);
+ }
+ }
+
+ /*
+ * Continue until no instructions are marked "changed".
+ */
+ while (true) {
+ /*
+ * Find the first marked one. Use "startGuess" as a way to find
+ * one quickly.
+ */
+ for (insnIdx = startGuess; insnIdx < insnsSize; insnIdx++) {
+ if (dvmInsnIsChanged(insnFlags, insnIdx))
+ break;
+ }
+
+ if (insnIdx == insnsSize) {
+ if (startGuess != 0) {
+ /* try again, starting from the top */
+ startGuess = 0;
+ continue;
+ } else {
+ /* all flags are clear */
+ break;
+ }
+ }
+
+ /*
+ * We carry the working set of registers from instruction to
+ * instruction. If this address can be the target of a branch
+ * (or throw) instruction, or if we're skipping around chasing
+ * "changed" flags, we need to load the set of registers from
+ * the table.
+ *
+ * Because we always prefer to continue on to the next instruction,
+ * we should never have a situation where we have a stray
+ * "changed" flag set on an instruction that isn't a branch target.
+ */
+ if (dvmInsnIsBranchTarget(insnFlags, insnIdx)) {
+ SRegType* insnRegs = getRegisterLine(pState, insnIdx);
+ assert(insnRegs != NULL);
+ copyRegisters(workRegs, insnRegs, pState->insnRegCountPlus);
+
+ } else {
+#ifndef NDEBUG
+ /*
+ * Sanity check: retrieve the stored register line (assuming
+ * a full table) and make sure it actually matches.
+ */
+ SRegType* insnRegs = getRegisterLine(pState, insnIdx);
+ if (insnRegs != NULL &&
+ compareRegisters(workRegs, insnRegs,
+ pState->insnRegCountPlus) != 0)
+ {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOG_VFY("HUH? workRegs diverged in %s.%s %s\n",
+ meth->clazz->descriptor, meth->name, desc);
+ free(desc);
+ }
+#endif
+ }
+
+ /*
+ * Update the register sets altered by this instruction.
+ */
+ if (!handleInstruction(pState, workRegs, insnIdx, &startGuess)) {
+ goto bail;
+ }
+
+ dvmInsnSetVisited(insnFlags, insnIdx, true);
+ dvmInsnSetChanged(insnFlags, insnIdx, false);
+ }
+
+ // TODO - add dead code scan to help validate this code?
+
+ result = true;
+
+bail:
+ return result;
+}
+
+/*
+ * Get a pointer to the method being invoked.
+ *
+ * Returns NULL on failure.
+ */
+static Method* getInvokedMethod(const Method* meth,
+ const DecodedInstruction* pDecInsn, MethodType methodType)
+{
+ Method* resMethod;
+ char* sigOriginal = NULL;
+
+ /*
+ * Resolve the method. This could be an abstract or concrete method
+ * depending on what sort of call we're making.
+ */
+ if (methodType == METHOD_INTERFACE) {
+ resMethod = dvmOptResolveInterfaceMethod(meth->clazz, pDecInsn->vB);
+ } else {
+ resMethod = dvmOptResolveMethod(meth->clazz, pDecInsn->vB, methodType);
+ }
+ if (resMethod == NULL) {
+ /* failed; print a meaningful failure message */
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexMethodId* pMethodId;
+ const char* methodName;
+ char* methodDesc;
+ const char* classDescriptor;
+
+ pMethodId = dexGetMethodId(pDexFile, pDecInsn->vB);
+ methodName = dexStringById(pDexFile, pMethodId->nameIdx);
+ methodDesc = dexCopyDescriptorFromMethodId(pDexFile, pMethodId);
+ classDescriptor = dexStringByTypeIdx(pDexFile, pMethodId->classIdx);
+
+ LOG_VFY("VFY: unable to resolve %s method %u: %s.%s %s\n",
+ dvmMethodTypeStr(methodType), pDecInsn->vB,
+ classDescriptor, methodName, methodDesc);
+ free(methodDesc);
+ return NULL;
+ }
+
+ return resMethod;
+}
+
+/*
+ * Return the register type for the method. Since we don't care about
+ * the actual type, we can just look at the "shorty" signature.
+ *
+ * Returns kRegTypeUnknown for "void".
+ */
+static SRegType getMethodReturnType(const Method* meth)
+{
+ SRegType type;
+
+ switch (meth->shorty[0]) {
+ case 'I':
+ type = kRegTypeInteger;
+ break;
+ case 'C':
+ type = kRegTypeChar;
+ break;
+ case 'S':
+ type = kRegTypeShort;
+ break;
+ case 'B':
+ type = kRegTypeByte;
+ break;
+ case 'Z':
+ type = kRegTypeBoolean;
+ break;
+ case 'V':
+ type = kRegTypeUnknown;
+ break;
+ case 'F':
+ type = kRegTypeFloat;
+ break;
+ case 'D':
+ type = kRegTypeDoubleLo;
+ break;
+ case 'J':
+ type = kRegTypeLongLo;
+ break;
+ case 'L':
+ //case '[':
+ type = kRegTypeReference;
+ break;
+ default:
+ /* we verified signature return type earlier, so this is impossible */
+ assert(false);
+ type = kRegTypeConflict;
+ break;
+ }
+
+ return type;
+}
+
+/*
+ * Copy a category 1 register.
+ */
+static inline void copyRegister1(SRegType* insnRegs, u4 vdst, u4 vsrc)
+{
+ insnRegs[vdst] = insnRegs[vsrc];
+}
+
+/*
+ * Copy a category 2 register. Note the source and destination may overlap.
+ */
+static inline void copyRegister2(SRegType* insnRegs, u4 vdst, u4 vsrc)
+{
+ //memmove(&insnRegs[vdst], &insnRegs[vsrc], sizeof(SRegType) * 2);
+ SRegType r1 = insnRegs[vsrc];
+ SRegType r2 = insnRegs[vsrc+1];
+ insnRegs[vdst] = r1;
+ insnRegs[vdst+1] = r2;
+}
+
+/*
+ * Set the type of a category 1 register.
+ */
+static inline void setRegisterType(SRegType* insnRegs, u4 vdst, SRegType type)
+{
+ insnRegs[vdst] = type;
+}
+
+/*
+ * Decode the specified instruction and update the register info.
+ */
+static bool handleInstruction(WorkState* pState, SRegType* workRegs,
+ int insnIdx, int* pStartGuess)
+{
+ const Method* meth = pState->method;
+ const u2* insns = meth->insns + insnIdx;
+ InsnFlags* insnFlags = pState->insnFlags;
+ bool result = false;
+
+ /*
+ * Once we finish decoding the instruction, we need to figure out where
+ * we can go from here. There are three possible ways to transfer
+ * control to another statement:
+ *
+ * (1) Continue to the next instruction. Applies to all but
+ * unconditional branches, method returns, and exception throws.
+ * (2) Branch to one or more possible locations. Applies to branches
+ * and switch statements.
+ * (3) Exception handlers. Applies to any instruction that can
+ * throw an exception that is handled by an encompassing "try"
+ * block. (We simplify this to be any instruction that can
+ * throw any exception.)
+ *
+ * We can also return, in which case there is no successor instruction
+ * from this point.
+ *
+ * The behavior can be determined from the InstrFlags.
+ */
+ DecodedInstruction decInsn;
+ SRegType entryRegs[pState->insnRegCountPlus];
+ const int insnRegCountPlus = pState->insnRegCountPlus;
+ bool justSetResult = false;
+ int branchTarget = 0;
+ SRegType tmpType;
+
+ dexDecodeInstruction(gDvm.instrFormat, insns, &decInsn);
+ const int nextFlags = dexGetInstrFlags(gDvm.instrFlags, decInsn.opCode);
+
+ /*
+ * Make a copy of the previous register state. If the instruction
+ * throws an exception, we merge *this* into the destination rather
+ * than workRegs, because we don't want the result from the "successful"
+ * code path (e.g. a check-cast that "improves" a type) to be visible
+ * to the exception handler.
+ */
+ if ((nextFlags & kInstrCanThrow) != 0 && dvmInsnIsInTry(insnFlags, insnIdx))
+ {
+ copyRegisters(entryRegs, workRegs, insnRegCountPlus);
+ }
+
+ switch (decInsn.opCode) {
+ case OP_NOP:
+ break;
+
+ case OP_MOVE:
+ case OP_MOVE_FROM16:
+ case OP_MOVE_16:
+ case OP_MOVE_OBJECT:
+ case OP_MOVE_OBJECT_FROM16:
+ case OP_MOVE_OBJECT_16:
+ copyRegister1(workRegs, decInsn.vA, decInsn.vB);
+ break;
+ case OP_MOVE_WIDE:
+ case OP_MOVE_WIDE_FROM16:
+ case OP_MOVE_WIDE_16:
+ copyRegister2(workRegs, decInsn.vA, decInsn.vB);
+ break;
+
+ /*
+ * The move-result instructions copy data out of a "pseudo-register"
+ * with the results from the last method invocation. In practice we
+ * might want to hold the result in an actual CPU register, so the
+ * Dalvik spec requires that these only appear immediately after an
+ * invoke or filled-new-array.
+ *
+ * These calls invalidate the "result" register. (This is now
+ * redundant with the reset done below, but it can make the debug info
+ * easier to read in some cases.)
+ */
+ case OP_MOVE_RESULT:
+ case OP_MOVE_RESULT_OBJECT:
+ copyRegister1(workRegs, decInsn.vA, RESULT_REGISTER(insnRegCountPlus));
+ break;
+ case OP_MOVE_RESULT_WIDE:
+ copyRegister2(workRegs, decInsn.vA, RESULT_REGISTER(insnRegCountPlus));
+ break;
+
+ case OP_MOVE_EXCEPTION:
+ /*
+ * This statement can only appear as the first instruction in an
+ * exception handler (though not all exception handlers need to
+ * have one of these). We verify that as part of extracting the
+ * exception type from the catch block list.
+ */
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+
+ case OP_RETURN_VOID:
+ case OP_RETURN:
+ case OP_RETURN_WIDE:
+ case OP_RETURN_OBJECT:
+ break;
+
+ case OP_CONST_4:
+ case OP_CONST_16:
+ case OP_CONST:
+ /* could be boolean, int, float, or a null reference */
+ setRegisterType(workRegs, decInsn.vA,
+ dvmDetermineCat1Const((s4)decInsn.vB));
+ break;
+ case OP_CONST_HIGH16:
+ /* could be boolean, int, float, or a null reference */
+ setRegisterType(workRegs, decInsn.vA,
+ dvmDetermineCat1Const((s4) decInsn.vB << 16));
+ break;
+ case OP_CONST_WIDE_16:
+ case OP_CONST_WIDE_32:
+ case OP_CONST_WIDE:
+ case OP_CONST_WIDE_HIGH16:
+ /* could be long or double; default to long and allow conversion */
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_CONST_STRING:
+ case OP_CONST_STRING_JUMBO:
+ case OP_CONST_CLASS:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+
+ case OP_MONITOR_ENTER:
+ case OP_MONITOR_EXIT:
+ break;
+
+ case OP_CHECK_CAST:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+ case OP_INSTANCE_OF:
+ /* result is boolean */
+ setRegisterType(workRegs, decInsn.vA, kRegTypeBoolean);
+ break;
+
+ case OP_ARRAY_LENGTH:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+
+ case OP_NEW_INSTANCE:
+ case OP_NEW_ARRAY:
+ /* add the new uninitialized reference to the register ste */
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+ case OP_FILLED_NEW_ARRAY:
+ case OP_FILLED_NEW_ARRAY_RANGE:
+ setRegisterType(workRegs, RESULT_REGISTER(insnRegCountPlus),
+ kRegTypeReference);
+ justSetResult = true;
+ break;
+
+ case OP_CMPL_FLOAT:
+ case OP_CMPG_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeBoolean);
+ break;
+ case OP_CMPL_DOUBLE:
+ case OP_CMPG_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeBoolean);
+ break;
+ case OP_CMP_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeBoolean);
+ break;
+
+ case OP_THROW:
+ case OP_GOTO:
+ case OP_GOTO_16:
+ case OP_GOTO_32:
+ case OP_PACKED_SWITCH:
+ case OP_SPARSE_SWITCH:
+ break;
+
+ case OP_FILL_ARRAY_DATA:
+ break;
+
+ case OP_IF_EQ:
+ case OP_IF_NE:
+ case OP_IF_LT:
+ case OP_IF_GE:
+ case OP_IF_GT:
+ case OP_IF_LE:
+ case OP_IF_EQZ:
+ case OP_IF_NEZ:
+ case OP_IF_LTZ:
+ case OP_IF_GEZ:
+ case OP_IF_GTZ:
+ case OP_IF_LEZ:
+ break;
+
+ case OP_AGET:
+ tmpType = kRegTypeInteger;
+ goto aget_1nr_common;
+ case OP_AGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto aget_1nr_common;
+ case OP_AGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto aget_1nr_common;
+ case OP_AGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto aget_1nr_common;
+ case OP_AGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto aget_1nr_common;
+aget_1nr_common:
+ setRegisterType(workRegs, decInsn.vA, tmpType);
+ break;
+
+ case OP_AGET_WIDE:
+ /*
+ * We know this is either long or double, and we don't really
+ * discriminate between those during verification, so we
+ * call it a long.
+ */
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+
+ case OP_AGET_OBJECT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+
+ case OP_APUT:
+ case OP_APUT_BOOLEAN:
+ case OP_APUT_BYTE:
+ case OP_APUT_CHAR:
+ case OP_APUT_SHORT:
+ case OP_APUT_WIDE:
+ case OP_APUT_OBJECT:
+ break;
+
+ case OP_IGET:
+ tmpType = kRegTypeInteger;
+ goto iget_1nr_common;
+ case OP_IGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto iget_1nr_common;
+ case OP_IGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto iget_1nr_common;
+ case OP_IGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto iget_1nr_common;
+ case OP_IGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto iget_1nr_common;
+iget_1nr_common:
+ setRegisterType(workRegs, decInsn.vA, tmpType);
+ break;
+
+ case OP_IGET_WIDE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+
+ case OP_IGET_OBJECT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+
+ case OP_IPUT:
+ case OP_IPUT_BOOLEAN:
+ case OP_IPUT_BYTE:
+ case OP_IPUT_CHAR:
+ case OP_IPUT_SHORT:
+ case OP_IPUT_WIDE:
+ case OP_IPUT_OBJECT:
+ break;
+
+ case OP_SGET:
+ tmpType = kRegTypeInteger;
+ goto sget_1nr_common;
+ case OP_SGET_BOOLEAN:
+ tmpType = kRegTypeBoolean;
+ goto sget_1nr_common;
+ case OP_SGET_BYTE:
+ tmpType = kRegTypeByte;
+ goto sget_1nr_common;
+ case OP_SGET_CHAR:
+ tmpType = kRegTypeChar;
+ goto sget_1nr_common;
+ case OP_SGET_SHORT:
+ tmpType = kRegTypeShort;
+ goto sget_1nr_common;
+sget_1nr_common:
+ setRegisterType(workRegs, decInsn.vA, tmpType);
+ break;
+
+ case OP_SGET_WIDE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+
+ case OP_SGET_OBJECT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeReference);
+ break;
+
+ case OP_SPUT:
+ case OP_SPUT_BOOLEAN:
+ case OP_SPUT_BYTE:
+ case OP_SPUT_CHAR:
+ case OP_SPUT_SHORT:
+ case OP_SPUT_WIDE:
+ case OP_SPUT_OBJECT:
+ break;
+
+ case OP_INVOKE_VIRTUAL:
+ case OP_INVOKE_VIRTUAL_RANGE:
+ case OP_INVOKE_SUPER:
+ case OP_INVOKE_SUPER_RANGE:
+ {
+ Method* calledMethod;
+
+ calledMethod = getInvokedMethod(meth, &decInsn, METHOD_VIRTUAL);
+ if (calledMethod == NULL)
+ goto bail;
+ setRegisterType(workRegs, RESULT_REGISTER(insnRegCountPlus),
+ getMethodReturnType(calledMethod));
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_DIRECT:
+ case OP_INVOKE_DIRECT_RANGE:
+ {
+ Method* calledMethod;
+
+ calledMethod = getInvokedMethod(meth, &decInsn, METHOD_DIRECT);
+ if (calledMethod == NULL)
+ goto bail;
+ setRegisterType(workRegs, RESULT_REGISTER(insnRegCountPlus),
+ getMethodReturnType(calledMethod));
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_STATIC:
+ case OP_INVOKE_STATIC_RANGE:
+ {
+ Method* calledMethod;
+
+ calledMethod = getInvokedMethod(meth, &decInsn, METHOD_STATIC);
+ if (calledMethod == NULL)
+ goto bail;
+ setRegisterType(workRegs, RESULT_REGISTER(insnRegCountPlus),
+ getMethodReturnType(calledMethod));
+ justSetResult = true;
+ }
+ break;
+ case OP_INVOKE_INTERFACE:
+ case OP_INVOKE_INTERFACE_RANGE:
+ {
+ Method* absMethod;
+
+ absMethod = getInvokedMethod(meth, &decInsn, METHOD_INTERFACE);
+ if (absMethod == NULL)
+ goto bail;
+ setRegisterType(workRegs, RESULT_REGISTER(insnRegCountPlus),
+ getMethodReturnType(absMethod));
+ justSetResult = true;
+ }
+ break;
+
+ case OP_NEG_INT:
+ case OP_NOT_INT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_NEG_LONG:
+ case OP_NOT_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_NEG_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_NEG_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_INT_TO_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_INT_TO_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_INT_TO_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_LONG_TO_INT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_LONG_TO_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_LONG_TO_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_FLOAT_TO_INT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_FLOAT_TO_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_FLOAT_TO_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_DOUBLE_TO_INT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_DOUBLE_TO_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_DOUBLE_TO_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_INT_TO_BYTE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeByte);
+ break;
+ case OP_INT_TO_CHAR:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeChar);
+ break;
+ case OP_INT_TO_SHORT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeShort);
+ break;
+
+ case OP_ADD_INT:
+ case OP_SUB_INT:
+ case OP_MUL_INT:
+ case OP_REM_INT:
+ case OP_DIV_INT:
+ case OP_SHL_INT:
+ case OP_SHR_INT:
+ case OP_USHR_INT:
+ case OP_AND_INT:
+ case OP_OR_INT:
+ case OP_XOR_INT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_ADD_LONG:
+ case OP_SUB_LONG:
+ case OP_MUL_LONG:
+ case OP_DIV_LONG:
+ case OP_REM_LONG:
+ case OP_AND_LONG:
+ case OP_OR_LONG:
+ case OP_XOR_LONG:
+ case OP_SHL_LONG:
+ case OP_SHR_LONG:
+ case OP_USHR_LONG:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_ADD_FLOAT:
+ case OP_SUB_FLOAT:
+ case OP_MUL_FLOAT:
+ case OP_DIV_FLOAT:
+ case OP_REM_FLOAT:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_ADD_DOUBLE:
+ case OP_SUB_DOUBLE:
+ case OP_MUL_DOUBLE:
+ case OP_DIV_DOUBLE:
+ case OP_REM_DOUBLE:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_ADD_INT_2ADDR:
+ case OP_SUB_INT_2ADDR:
+ case OP_MUL_INT_2ADDR:
+ case OP_REM_INT_2ADDR:
+ case OP_SHL_INT_2ADDR:
+ case OP_SHR_INT_2ADDR:
+ case OP_USHR_INT_2ADDR:
+ case OP_AND_INT_2ADDR:
+ case OP_OR_INT_2ADDR:
+ case OP_XOR_INT_2ADDR:
+ case OP_DIV_INT_2ADDR:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+ case OP_ADD_LONG_2ADDR:
+ case OP_SUB_LONG_2ADDR:
+ case OP_MUL_LONG_2ADDR:
+ case OP_DIV_LONG_2ADDR:
+ case OP_REM_LONG_2ADDR:
+ case OP_AND_LONG_2ADDR:
+ case OP_OR_LONG_2ADDR:
+ case OP_XOR_LONG_2ADDR:
+ case OP_SHL_LONG_2ADDR:
+ case OP_SHR_LONG_2ADDR:
+ case OP_USHR_LONG_2ADDR:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeLongLo);
+ break;
+ case OP_ADD_FLOAT_2ADDR:
+ case OP_SUB_FLOAT_2ADDR:
+ case OP_MUL_FLOAT_2ADDR:
+ case OP_DIV_FLOAT_2ADDR:
+ case OP_REM_FLOAT_2ADDR:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeFloat);
+ break;
+ case OP_ADD_DOUBLE_2ADDR:
+ case OP_SUB_DOUBLE_2ADDR:
+ case OP_MUL_DOUBLE_2ADDR:
+ case OP_DIV_DOUBLE_2ADDR:
+ case OP_REM_DOUBLE_2ADDR:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeDoubleLo);
+ break;
+ case OP_ADD_INT_LIT16:
+ case OP_RSUB_INT:
+ case OP_MUL_INT_LIT16:
+ case OP_DIV_INT_LIT16:
+ case OP_REM_INT_LIT16:
+ case OP_AND_INT_LIT16:
+ case OP_OR_INT_LIT16:
+ case OP_XOR_INT_LIT16:
+ case OP_ADD_INT_LIT8:
+ case OP_RSUB_INT_LIT8:
+ case OP_MUL_INT_LIT8:
+ case OP_DIV_INT_LIT8:
+ case OP_REM_INT_LIT8:
+ case OP_SHL_INT_LIT8:
+ case OP_SHR_INT_LIT8:
+ case OP_USHR_INT_LIT8:
+ case OP_AND_INT_LIT8:
+ case OP_OR_INT_LIT8:
+ case OP_XOR_INT_LIT8:
+ setRegisterType(workRegs, decInsn.vA, kRegTypeInteger);
+ break;
+
+
+ /*
+ * See comments in analysis/CodeVerify.c re: why some of these are
+ * annoying to deal with. It's worse in this implementation, because
+ * we're not keeping any information about the classes held in each
+ * reference register.
+ *
+ * Handling most of these would require retaining the field/method
+ * reference info that we discarded when the instructions were
+ * quickened. This is feasible but not currently supported.
+ */
+ case OP_EXECUTE_INLINE:
+ case OP_INVOKE_DIRECT_EMPTY:
+ case OP_IGET_QUICK:
+ case OP_IGET_WIDE_QUICK:
+ case OP_IGET_OBJECT_QUICK:
+ case OP_IPUT_QUICK:
+ case OP_IPUT_WIDE_QUICK:
+ case OP_IPUT_OBJECT_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK:
+ case OP_INVOKE_VIRTUAL_QUICK_RANGE:
+ case OP_INVOKE_SUPER_QUICK:
+ case OP_INVOKE_SUPER_QUICK_RANGE:
+ dvmAbort(); // not implemented, shouldn't be here
+ break;
+
+
+ /* these should never appear */
+ case OP_UNUSED_3E:
+ case OP_UNUSED_3F:
+ case OP_UNUSED_40:
+ case OP_UNUSED_41:
+ case OP_UNUSED_42:
+ case OP_UNUSED_43:
+ case OP_UNUSED_73:
+ case OP_UNUSED_79:
+ case OP_UNUSED_7A:
+ case OP_UNUSED_E3:
+ case OP_UNUSED_E4:
+ case OP_UNUSED_E5:
+ case OP_UNUSED_E6:
+ case OP_UNUSED_E7:
+ case OP_UNUSED_E8:
+ case OP_UNUSED_E9:
+ case OP_UNUSED_EA:
+ case OP_UNUSED_EB:
+ case OP_UNUSED_EC:
+ case OP_UNUSED_ED:
+ case OP_UNUSED_EF:
+ case OP_UNUSED_F1:
+ case OP_UNUSED_FC:
+ case OP_UNUSED_FD:
+ case OP_UNUSED_FE:
+ case OP_UNUSED_FF:
+ dvmAbort();
+ break;
+
+ /*
+ * DO NOT add a "default" clause here. Without it the compiler will
+ * complain if an instruction is missing (which is desirable).
+ */
+ }
+
+
+ /*
+ * If we didn't just set the result register, clear it out. This
+ * isn't so important here, but does help ensure that our output matches
+ * the verifier.
+ */
+ if (!justSetResult) {
+ int reg = RESULT_REGISTER(pState->insnRegCountPlus);
+ workRegs[reg] = workRegs[reg+1] = kRegTypeUnknown;
+ }
+
+ /*
+ * Handle "continue". Tag the next consecutive instruction.
+ */
+ if ((nextFlags & kInstrCanContinue) != 0) {
+ int insnWidth = dvmInsnGetWidth(insnFlags, insnIdx);
+
+ /*
+ * We want to update the registers and set the "changed" flag on the
+ * next instruction (if necessary). We aren't storing register
+ * changes for all addresses, so for non-GC-point targets we just
+ * compare "entry" vs. "work" to see if we've changed anything.
+ */
+ if (getRegisterLine(pState, insnIdx+insnWidth) != NULL) {
+ updateRegisters(pState, insnIdx+insnWidth, workRegs);
+ } else {
+ /* if not yet visited, or regs were updated, set "changed" */
+ if (!dvmInsnIsVisited(insnFlags, insnIdx+insnWidth) ||
+ compareRegisters(workRegs, entryRegs,
+ pState->insnRegCountPlus) != 0)
+ {
+ dvmInsnSetChanged(insnFlags, insnIdx+insnWidth, true);
+ }
+ }
+ }
+
+ /*
+ * Handle "branch". Tag the branch target.
+ */
+ if ((nextFlags & kInstrCanBranch) != 0) {
+ bool isConditional;
+
+ dvmGetBranchTarget(meth, insnFlags, insnIdx, &branchTarget,
+ &isConditional);
+ assert(isConditional || (nextFlags & kInstrCanContinue) == 0);
+ assert(!isConditional || (nextFlags & kInstrCanContinue) != 0);
+
+ updateRegisters(pState, insnIdx+branchTarget, workRegs);
+ }
+
+ /*
+ * Handle "switch". Tag all possible branch targets.
+ */
+ if ((nextFlags & kInstrCanSwitch) != 0) {
+ int offsetToSwitch = insns[1] | (((s4)insns[2]) << 16);
+ const u2* switchInsns = insns + offsetToSwitch;
+ int switchCount = switchInsns[1];
+ int offsetToTargets, targ;
+
+ if ((*insns & 0xff) == OP_PACKED_SWITCH) {
+ /* 0=sig, 1=count, 2/3=firstKey */
+ offsetToTargets = 4;
+ } else {
+ /* 0=sig, 1=count, 2..count*2 = keys */
+ assert((*insns & 0xff) == OP_SPARSE_SWITCH);
+ offsetToTargets = 2 + 2*switchCount;
+ }
+
+ /* verify each switch target */
+ for (targ = 0; targ < switchCount; targ++) {
+ int offset, absOffset;
+
+ /* offsets are 32-bit, and only partly endian-swapped */
+ offset = switchInsns[offsetToTargets + targ*2] |
+ (((s4) switchInsns[offsetToTargets + targ*2 +1]) << 16);
+ absOffset = insnIdx + offset;
+ assert(absOffset >= 0 && absOffset < pState->insnsSize);
+
+ updateRegisters(pState, absOffset, workRegs);
+ }
+ }
+
+ /*
+ * Handle instructions that can throw and that are sitting in a
+ * "try" block. (If they're not in a "try" block when they throw,
+ * control transfers out of the method.)
+ */
+ if ((nextFlags & kInstrCanThrow) != 0 && dvmInsnIsInTry(insnFlags, insnIdx))
+ {
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexCode* pCode = dvmGetMethodCode(meth);
+ DexCatchIterator iterator;
+
+ if (dexFindCatchHandler(&iterator, pCode, insnIdx)) {
+ while (true) {
+ DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
+ if (handler == NULL)
+ break;
+
+ /* note we use entryRegs, not workRegs */
+ updateRegisters(pState, handler->address, entryRegs);
+ }
+ }
+ }
+
+ /*
+ * Update startGuess. Advance to the next instruction of that's
+ * possible, otherwise use the branch target if one was found. If
+ * neither of those exists we're in a return or throw; leave startGuess
+ * alone and let the caller sort it out.
+ */
+ if ((nextFlags & kInstrCanContinue) != 0) {
+ *pStartGuess = insnIdx + dvmInsnGetWidth(insnFlags, insnIdx);
+ } else if ((nextFlags & kInstrCanBranch) != 0) {
+ /* we're still okay if branchTarget is zero */
+ *pStartGuess = insnIdx + branchTarget;
+ }
+
+ assert(*pStartGuess >= 0 && *pStartGuess < pState->insnsSize &&
+ dvmInsnGetWidth(insnFlags, *pStartGuess) != 0);
+
+ result = true;
+
+bail:
+ return result;
+}
+
+
+/*
+ * Merge two SRegType values.
+ *
+ * Sets "*pChanged" to "true" if the result doesn't match "type1".
+ */
+static SRegType mergeTypes(SRegType type1, SRegType type2, bool* pChanged)
+{
+ SRegType result;
+
+ /*
+ * Check for trivial case so we don't have to hit memory.
+ */
+ if (type1 == type2)
+ return type1;
+
+ /*
+ * Use the table if we can, and reject any attempts to merge something
+ * from the table with a reference type.
+ *
+ * The uninitialized table entry at index zero *will* show up as a
+ * simple kRegTypeUninit value. Since this cannot be merged with
+ * anything but itself, the rules do the right thing.
+ */
+ if (type1 < kRegTypeMAX) {
+ if (type2 < kRegTypeMAX) {
+ result = gDvmMergeTab[type1][type2];
+ } else {
+ /* simple + reference == conflict, usually */
+ if (type1 == kRegTypeZero)
+ result = type2;
+ else
+ result = kRegTypeConflict;
+ }
+ } else {
+ if (type2 < kRegTypeMAX) {
+ /* reference + simple == conflict, usually */
+ if (type2 == kRegTypeZero)
+ result = type1;
+ else
+ result = kRegTypeConflict;
+ } else {
+ /* merging two references */
+ assert(type1 == type2);
+ result = type1;
+ }
+ }
+
+ if (result != type1)
+ *pChanged = true;
+ return result;
+}
+
+/*
+ * Control can transfer to "nextInsn".
+ *
+ * Merge the registers from "workRegs" into "addrRegs" at "nextInsn", and
+ * set the "changed" flag on the target address if the registers have changed.
+ */
+static void updateRegisters(WorkState* pState, int nextInsn,
+ const SRegType* workRegs)
+{
+ const Method* meth = pState->method;
+ InsnFlags* insnFlags = pState->insnFlags;
+ const int insnRegCountPlus = pState->insnRegCountPlus;
+ SRegType* targetRegs = getRegisterLine(pState, nextInsn);
+
+ if (!dvmInsnIsVisitedOrChanged(insnFlags, nextInsn)) {
+ /*
+ * We haven't processed this instruction before, and we haven't
+ * touched the registers here, so there's nothing to "merge". Copy
+ * the registers over and mark it as changed. (This is the only
+ * way a register can transition out of "unknown", so this is not
+ * just an optimization.)
+ */
+ LOGVV("COPY into 0x%04x\n", nextInsn);
+ copyRegisters(targetRegs, workRegs, insnRegCountPlus);
+ dvmInsnSetChanged(insnFlags, nextInsn, true);
+ } else {
+ /* merge registers, set Changed only if different */
+ LOGVV("MERGE into 0x%04x\n", nextInsn);
+ bool changed = false;
+ int i;
+
+ for (i = 0; i < insnRegCountPlus; i++) {
+ targetRegs[i] = mergeTypes(targetRegs[i], workRegs[i], &changed);
+ }
+
+ if (changed)
+ dvmInsnSetChanged(insnFlags, nextInsn, true);
+ }
+}
+
+#endif /*#if 0*/
+
diff --git a/vm/analysis/RegisterMap.h b/vm/analysis/RegisterMap.h
new file mode 100644
index 0000000..2a890e7
--- /dev/null
+++ b/vm/analysis/RegisterMap.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// ** UNDER CONSTRUCTION **
+
+/*
+ * Declaration of register map data structure and related functions.
+ */
+#ifndef _DALVIK_REGISTERMAP
+#define _DALVIK_REGISTERMAP
+
+/*
+ * Format enumeration for RegisterMap data area.
+ */
+typedef enum RegisterMapFormat {
+ kFormatUnknown = 0,
+ kFormatCompact8, /* compact layout, 8-bit addresses */
+ kFormatCompact16, /* compact layout, 16-bit addresses */
+ // TODO: compressed stream
+} RegisterMapFormat;
+
+/*
+ * This is a single variable-size structure. It may be allocated on the
+ * heap or mapped out of a (post-dexopt) DEX file.
+ */
+struct RegisterMap {
+ /* header */
+ u1 format; /* enum RegisterMapFormat */
+ u1 regWidth; /* bytes per register line, 1+ */
+ u2 numEntries; /* number of entries */
+
+ /* data starts here; no alignment guarantees made */
+ u1 data[1];
+};
+
+/*
+ * Generate the register map for a previously-verified method.
+ *
+ * Returns a pointer to a newly-allocated RegisterMap.
+ */
+//RegisterMap* dvmGenerateRegisterMap(const Method* meth);
+
+/*
+ * Various bits of data generated by the verifier, wrapped up in a package
+ * for ease of use by the register map generator.
+ */
+typedef struct VerifierData {
+ /*
+ * The method we're working on.
+ */
+ const Method* method;
+
+ /*
+ * Number of instructions in the method.
+ */
+ int insnsSize;
+
+ /*
+ * Number of registers we track for each instruction. This is equal
+ * to the method's declared "registersSize". (Does not include the
+ * pending return value.)
+ */
+ int insnRegCount;
+
+ /*
+ * Instruction widths and flags, one entry per code unit.
+ */
+ InsnFlags* insnFlags;
+
+ /*
+ * Array of SRegType arrays, one entry per code unit. We only need
+ * entries for code units that hold the start of an "interesting"
+ * instruction. For register map generation, we're only interested
+ * in GC points.
+ */
+ RegType** addrRegs;
+} VerifierData;
+
+/*
+ * Generate the register map for a method that has just been verified
+ * (i.e. we're doing this as part of verification).
+ *
+ * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
+ */
+RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata);
+
+#endif /*_DALVIK_REGISTERMAP*/
diff --git a/vm/analysis/VerifySubs.c b/vm/analysis/VerifySubs.c
new file mode 100644
index 0000000..8dcc6f8
--- /dev/null
+++ b/vm/analysis/VerifySubs.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik verification subroutines.
+ */
+#include "Dalvik.h"
+#include "analysis/CodeVerify.h"
+#include "libdex/DexCatch.h"
+#include "libdex/InstrUtils.h"
+
+
+/*
+ * Compute the width of the instruction at each address in the instruction
+ * stream. Addresses that are in the middle of an instruction, or that
+ * are part of switch table data, are not set (so the caller should probably
+ * initialize "insnFlags" to zero).
+ *
+ * If "pNewInstanceCount" is not NULL, it will be set to the number of
+ * new-instance instructions in the method.
+ *
+ * Logs an error and returns "false" on failure.
+ */
+bool dvmComputeCodeWidths(const Method* meth, InsnFlags* insnFlags,
+ int* pNewInstanceCount)
+{
+ const int insnCount = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns;
+ bool result = false;
+ int newInstanceCount = 0;
+ int i;
+
+
+ for (i = 0; i < insnCount; /**/) {
+ int width;
+
+ /*
+ * Switch tables and array data tables are identified with
+ * "extended NOP" opcodes. They contain no executable code,
+ * so we can just skip past them.
+ */
+ if (*insns == kPackedSwitchSignature) {
+ width = 4 + insns[1] * 2;
+ } else if (*insns == kSparseSwitchSignature) {
+ width = 2 + insns[1] * 4;
+ } else if (*insns == kArrayDataSignature) {
+ u4 size = insns[2] | (((u4)insns[3]) << 16);
+ width = 4 + (insns[1] * size + 1) / 2;
+ } else {
+ int instr = *insns & 0xff;
+ width = dexGetInstrWidthAbs(gDvm.instrWidth, instr);
+ if (width == 0) {
+ LOG_VFY_METH(meth,
+ "VFY: invalid post-opt instruction (0x%x)\n", instr);
+ goto bail;
+ }
+ if (width < 0 || width > 5) {
+ LOGE("VFY: bizarre width value %d\n", width);
+ dvmAbort();
+ }
+
+ if (instr == OP_NEW_INSTANCE)
+ newInstanceCount++;
+ }
+
+ if (width > 65535) {
+ LOG_VFY_METH(meth, "VFY: insane width %d\n", width);
+ goto bail;
+ }
+
+ insnFlags[i] |= width;
+ i += width;
+ insns += width;
+ }
+ if (i != (int) dvmGetMethodInsnsSize(meth)) {
+ LOG_VFY_METH(meth, "VFY: code did not end where expected (%d vs. %d)\n",
+ i, dvmGetMethodInsnsSize(meth));
+ goto bail;
+ }
+
+ result = true;
+ if (pNewInstanceCount != NULL)
+ *pNewInstanceCount = newInstanceCount;
+
+bail:
+ return result;
+}
+
+/*
+ * Set the "in try" flags for all instructions protected by "try" statements.
+ * Also sets the "branch target" flags for exception handlers.
+ *
+ * Call this after widths have been set in "insnFlags".
+ *
+ * Returns "false" if something in the exception table looks fishy, but
+ * we're expecting the exception table to be somewhat sane.
+ */
+bool dvmSetTryFlags(const Method* meth, InsnFlags* insnFlags)
+{
+ u4 insnsSize = dvmGetMethodInsnsSize(meth);
+ DexFile* pDexFile = meth->clazz->pDvmDex->pDexFile;
+ const DexCode* pCode = dvmGetMethodCode(meth);
+ u4 triesSize = pCode->triesSize;
+ const DexTry* pTries;
+ u4 handlersSize;
+ u4 offset;
+ u4 i;
+
+ if (triesSize == 0) {
+ return true;
+ }
+
+ pTries = dexGetTries(pCode);
+ handlersSize = dexGetHandlersSize(pCode);
+
+ for (i = 0; i < triesSize; i++) {
+ const DexTry* pTry = &pTries[i];
+ u4 start = pTry->startAddr;
+ u4 end = start + pTry->insnCount;
+ u4 addr;
+
+ if ((start >= end) || (start >= insnsSize) || (end > insnsSize)) {
+ LOG_VFY_METH(meth,
+ "VFY: bad exception entry: startAddr=%d endAddr=%d (size=%d)\n",
+ start, end, insnsSize);
+ return false;
+ }
+
+ if (dvmInsnGetWidth(insnFlags, start) == 0) {
+ LOG_VFY_METH(meth,
+ "VFY: 'try' block starts inside an instruction (%d)\n",
+ start);
+ return false;
+ }
+
+ for (addr = start; addr < end;
+ addr += dvmInsnGetWidth(insnFlags, addr))
+ {
+ assert(dvmInsnGetWidth(insnFlags, addr) != 0);
+ dvmInsnSetInTry(insnFlags, addr, true);
+ }
+ }
+
+ /* Iterate over each of the handlers to verify target addresses. */
+ offset = dexGetFirstHandlerOffset(pCode);
+ for (i = 0; i < handlersSize; i++) {
+ DexCatchIterator iterator;
+ dexCatchIteratorInit(&iterator, pCode, offset);
+
+ for (;;) {
+ DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
+ u4 addr;
+
+ if (handler == NULL) {
+ break;
+ }
+
+ addr = handler->address;
+ if (dvmInsnGetWidth(insnFlags, addr) == 0) {
+ LOG_VFY_METH(meth,
+ "VFY: exception handler starts at bad address (%d)\n",
+ addr);
+ return false;
+ }
+
+ dvmInsnSetBranchTarget(insnFlags, addr, true);
+ }
+
+ offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
+ }
+
+ return true;
+}
+
+/*
+ * Verify a switch table. "curOffset" is the offset of the switch
+ * instruction.
+ */
+bool dvmCheckSwitchTargets(const Method* meth, InsnFlags* insnFlags,
+ int curOffset)
+{
+ const int insnCount = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns + curOffset;
+ const u2* switchInsns;
+ u2 expectedSignature;
+ int switchCount, tableSize;
+ int offsetToSwitch, offsetToKeys, offsetToTargets, targ;
+ int offset, absOffset;
+
+ assert(curOffset >= 0 && curOffset < insnCount);
+
+ /* make sure the start of the switch is in range */
+ offsetToSwitch = (s2) insns[1];
+ if (curOffset + offsetToSwitch < 0 ||
+ curOffset + offsetToSwitch + 2 >= insnCount)
+ {
+ LOG_VFY_METH(meth,
+ "VFY: invalid switch start: at %d, switch offset %d, count %d\n",
+ curOffset, offsetToSwitch, insnCount);
+ return false;
+ }
+
+ /* offset to switch table is a relative branch-style offset */
+ switchInsns = insns + offsetToSwitch;
+
+ /* make sure the table is 32-bit aligned */
+ if ((((u4) switchInsns) & 0x03) != 0) {
+ LOG_VFY_METH(meth,
+ "VFY: unaligned switch table: at %d, switch offset %d\n",
+ curOffset, offsetToSwitch);
+ return false;
+ }
+
+ switchCount = switchInsns[1];
+
+ if ((*insns & 0xff) == OP_PACKED_SWITCH) {
+ /* 0=sig, 1=count, 2/3=firstKey */
+ offsetToTargets = 4;
+ offsetToKeys = -1;
+ expectedSignature = kPackedSwitchSignature;
+ } else {
+ /* 0=sig, 1=count, 2..count*2 = keys */
+ offsetToKeys = 2;
+ offsetToTargets = 2 + 2*switchCount;
+ expectedSignature = kSparseSwitchSignature;
+ }
+ tableSize = offsetToTargets + switchCount*2;
+
+ if (switchInsns[0] != expectedSignature) {
+ LOG_VFY_METH(meth,
+ "VFY: wrong signature for switch table (0x%04x, wanted 0x%04x)\n",
+ switchInsns[0], expectedSignature);
+ return false;
+ }
+
+ /* make sure the end of the switch is in range */
+ if (curOffset + offsetToSwitch + tableSize > insnCount) {
+ LOG_VFY_METH(meth,
+ "VFY: invalid switch end: at %d, switch offset %d, end %d, count %d\n",
+ curOffset, offsetToSwitch, curOffset + offsetToSwitch + tableSize,
+ insnCount);
+ return false;
+ }
+
+ /* for a sparse switch, verify the keys are in ascending order */
+ if (offsetToKeys > 0 && switchCount > 1) {
+ s4 lastKey;
+
+ lastKey = switchInsns[offsetToKeys] |
+ (switchInsns[offsetToKeys+1] << 16);
+ for (targ = 1; targ < switchCount; targ++) {
+ s4 key = (s4) switchInsns[offsetToKeys + targ*2] |
+ (s4) (switchInsns[offsetToKeys + targ*2 +1] << 16);
+ if (key <= lastKey) {
+ LOG_VFY_METH(meth,
+ "VFY: invalid packed switch: last key=%d, this=%d\n",
+ lastKey, key);
+ return false;
+ }
+
+ lastKey = key;
+ }
+ }
+
+ /* verify each switch target */
+ for (targ = 0; targ < switchCount; targ++) {
+ offset = (s4) switchInsns[offsetToTargets + targ*2] |
+ (s4) (switchInsns[offsetToTargets + targ*2 +1] << 16);
+ absOffset = curOffset + offset;
+
+ if (absOffset < 0 || absOffset >= insnCount ||
+ !dvmInsnIsOpcode(insnFlags, absOffset))
+ {
+ LOG_VFY_METH(meth,
+ "VFY: invalid switch target %d (-> 0x%x) at 0x%x[%d]\n",
+ offset, absOffset, curOffset, targ);
+ return false;
+ }
+ dvmInsnSetBranchTarget(insnFlags, absOffset, true);
+ }
+
+ return true;
+}
+
+/*
+ * Verify that the target of a branch instruction is valid.
+ *
+ * We don't expect code to jump directly into an exception handler, but
+ * it's valid to do so as long as the target isn't a "move-exception"
+ * instruction. We verify that in a later stage.
+ *
+ * The VM spec doesn't forbid an instruction from branching to itself,
+ * but the Dalvik spec declares that only certain instructions can do so.
+ */
+bool dvmCheckBranchTarget(const Method* meth, InsnFlags* insnFlags,
+ int curOffset, bool selfOkay)
+{
+ const int insnCount = dvmGetMethodInsnsSize(meth);
+ const u2* insns = meth->insns + curOffset;
+ int offset, absOffset;
+ bool isConditional;
+
+ if (!dvmGetBranchTarget(meth, insnFlags, curOffset, &offset,
+ &isConditional))
+ return false;
+
+ if (!selfOkay && offset == 0) {
+ LOG_VFY_METH(meth, "VFY: branch offset of zero not allowed at 0x%x\n",
+ curOffset);
+ return false;
+ }
+
+ /*
+ * Check for 32-bit overflow. This isn't strictly necessary if we can
+ * depend on the VM to have identical "wrap-around" behavior, but
+ * it's unwise to depend on that.
+ */
+ if (((s8) curOffset + (s8) offset) != (s8)(curOffset + offset)) {
+ LOG_VFY_METH(meth, "VFY: branch target overflow 0x%x +%d\n",
+ curOffset, offset);
+ return false;
+ }
+ absOffset = curOffset + offset;
+ if (absOffset < 0 || absOffset >= insnCount ||
+ !dvmInsnIsOpcode(insnFlags, absOffset))
+ {
+ LOG_VFY_METH(meth,
+ "VFY: invalid branch target %d (-> 0x%x) at 0x%x\n",
+ offset, absOffset, curOffset);
+ return false;
+ }
+ dvmInsnSetBranchTarget(insnFlags, absOffset, true);
+
+ return true;
+}
+
+
+/*
+ * Output a code verifier warning message. For the pre-verifier it's not
+ * a big deal if something fails (and it may even be expected), but if
+ * we're doing just-in-time verification it's significant.
+ */
+void dvmLogVerifyFailure(const Method* meth, const char* format, ...)
+{
+ va_list ap;
+ int logLevel;
+
+ if (gDvm.optimizing) {
+ return;
+ //logLevel = ANDROID_LOG_DEBUG;
+ } else {
+ logLevel = ANDROID_LOG_WARN;
+ }
+
+ va_start(ap, format);
+ LOG_PRI_VA(logLevel, LOG_TAG, format, ap);
+ if (meth != NULL) {
+ char* desc = dexProtoCopyMethodDescriptor(&meth->prototype);
+ LOG_PRI(logLevel, LOG_TAG, "VFY: rejected %s.%s %s\n",
+ meth->clazz->descriptor, meth->name, desc);
+ free(desc);
+ }
+}
+
+/*
+ * Show a relatively human-readable message describing the failure to
+ * resolve a class.
+ *
+ * TODO: this is somewhat misleading when resolution fails because of
+ * illegal access rather than nonexistent class.
+ */
+void dvmLogUnableToResolveClass(const char* missingClassDescr,
+ const Method* meth)
+{
+ if (gDvm.optimizing)
+ return;
+
+ char* dotMissingClass = dvmDescriptorToDot(missingClassDescr);
+ char* dotFromClass = dvmDescriptorToDot(meth->clazz->descriptor);
+ //char* methodDescr = dexProtoCopyMethodDescriptor(&meth->prototype);
+
+ LOGE("Could not find class '%s', referenced from method %s.%s\n",
+ dotMissingClass, dotFromClass, meth->name/*, methodDescr*/);
+
+ free(dotMissingClass);
+ free(dotFromClass);
+ //free(methodDescr);
+}
+
+/*
+ * Extract the relative offset from a branch instruction.
+ *
+ * Returns "false" on failure (e.g. this isn't a branch instruction).
+ */
+bool dvmGetBranchTarget(const Method* meth, InsnFlags* insnFlags,
+ int curOffset, int* pOffset, bool* pConditional)
+{
+ const u2* insns = meth->insns + curOffset;
+ int tmp;
+
+ switch (*insns & 0xff) {
+ case OP_GOTO:
+ *pOffset = ((s2) *insns) >> 8;
+ *pConditional = false;
+ break;
+ case OP_GOTO_32:
+ *pOffset = insns[1] | (((u4) insns[2]) << 16);
+ *pConditional = false;
+ break;
+ case OP_GOTO_16:
+ *pOffset = (s2) insns[1];
+ *pConditional = false;
+ break;
+ case OP_IF_EQ:
+ case OP_IF_NE:
+ case OP_IF_LT:
+ case OP_IF_GE:
+ case OP_IF_GT:
+ case OP_IF_LE:
+ case OP_IF_EQZ:
+ case OP_IF_NEZ:
+ case OP_IF_LTZ:
+ case OP_IF_GEZ:
+ case OP_IF_GTZ:
+ case OP_IF_LEZ:
+ *pOffset = (s2) insns[1];
+ *pConditional = true;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+/*
+ * Given a 32-bit constant, return the most-restricted RegType enum entry
+ * that can hold the value.
+ */
+char dvmDetermineCat1Const(s4 value)
+{
+ if (value < -32768)
+ return kRegTypeInteger;
+ else if (value < -128)
+ return kRegTypeShort;
+ else if (value < 0)
+ return kRegTypeByte;
+ else if (value == 0)
+ return kRegTypeZero;
+ else if (value == 1)
+ return kRegTypeOne;
+ else if (value < 128)
+ return kRegTypePosByte;
+ else if (value < 32768)
+ return kRegTypePosShort;
+ else if (value < 65536)
+ return kRegTypeChar;
+ else
+ return kRegTypeInteger;
+}
+
diff --git a/vm/analysis/VerifySubs.h b/vm/analysis/VerifySubs.h
new file mode 100644
index 0000000..4d5b57c
--- /dev/null
+++ b/vm/analysis/VerifySubs.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Dalvik bytecode verification subroutines.
+ */
+#ifndef _DALVIK_VERIFYSUBS
+#define _DALVIK_VERIFYSUBS
+
+/*
+ * InsnFlags is a 32-bit integer with the following layout:
+ * 0-15 instruction length (or 0 if this address doesn't hold an opcode)
+ * 16 opcode flag (indicating this address holds an opcode)
+ * 17 try block (indicating exceptions thrown here may be caught locally)
+ * 30 visited (verifier has examined this instruction at least once)
+ * 31 changed (set/cleared as bytecode verifier runs)
+ */
+typedef u4 InsnFlags;
+
+#define kInsnFlagWidthMask 0x0000ffff
+#define kInsnFlagInTry (1 << 16)
+#define kInsnFlagBranchTarget (1 << 17)
+#define kInsnFlagGcPoint (1 << 18)
+#define kInsnFlagVisited (1 << 30)
+#define kInsnFlagChanged (1 << 31)
+
+/* add opcode widths to InsnFlags */
+bool dvmComputeCodeWidths(const Method* meth, InsnFlags* insnFlags,
+ int* pNewInstanceCount);
+
+/* set the "in try" flag for sections of code wrapped with a "try" block */
+bool dvmSetTryFlags(const Method* meth, InsnFlags* insnFlags);
+
+/* check switch targets and set the "branch target" flag for destinations */
+bool dvmCheckSwitchTargets(const Method* meth, InsnFlags* insnFlags,
+ int curOffset);
+
+/* verify branch target and set "branch target" flag on the destination */
+bool dvmCheckBranchTarget(const Method* meth, InsnFlags* insnFlags,
+ int curOffset, bool selfOkay);
+
+/* verification failure reporting */
+#define LOG_VFY(...) dvmLogVerifyFailure(NULL, __VA_ARGS__)
+#define LOG_VFY_METH(_meth, ...) dvmLogVerifyFailure(_meth, __VA_ARGS__)
+
+/* log verification failure with optional method info */
+void dvmLogVerifyFailure(const Method* meth, const char* format, ...);
+
+/* log verification failure due to resolution trouble */
+void dvmLogUnableToResolveClass(const char* missingClassDescr,
+ const Method* meth);
+
+/* extract the relative branch target from a branch instruction */
+bool dvmGetBranchTarget(const Method* meth, InsnFlags* insnFlags,
+ int curOffset, int* pOffset, bool* pConditional);
+
+/* return a RegType enumeration value that "value" just fits into */
+char dvmDetermineCat1Const(s4 value);
+
+#endif /*_DALVIK_VERIFYSUBS*/