auto import //branches/master/...@140412
diff --git a/vm/Android.mk b/vm/Android.mk
index 25a03fd..1c3a052 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -52,9 +52,11 @@
 # - assert()  (NDEBUG is handled in the build system)
 #
 ifeq ($(TARGET_BUILD_TYPE),debug)
-LOCAL_CFLAGS += -DWITH_INSTR_CHECKS -DWITH_EXTRA_OBJECT_VALIDATION
+LOCAL_CFLAGS += -DWITH_INSTR_CHECKS
+LOCAL_CFLAGS += -DWITH_EXTRA_OBJECT_VALIDATION
 LOCAL_CFLAGS += -DWITH_TRACKREF_CHECKS
 LOCAL_CFLAGS += -DWITH_ALLOC_LIMITS
+LOCAL_CFLAGS += -DWITH_EXTRA_GC_CHECKS=1
 #LOCAL_CFLAGS += -DCHECK_MUTEX
 #LOCAL_CFLAGS += -DPROFILE_FIELD_ACCESS
 LOCAL_CFLAGS += -DDVM_SHOW_EXCEPTION=3
@@ -73,6 +75,8 @@
 #LOCAL_CFLAGS += -DNDEBUG -DLOG_NDEBUG=1
 # "-O2" is redundant for device (release) but useful for sim (debug)
 #LOCAL_CFLAGS += -O2 -Winline
+#LOCAL_CFLAGS += -DWITH_EXTRA_OBJECT_VALIDATION
+LOCAL_CFLAGS += -DWITH_EXTRA_GC_CHECKS=1
 LOCAL_CFLAGS += -DDVM_SHOW_EXCEPTION=1
 # if you want to try with assertions on the device, add:
 #LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT
@@ -173,6 +177,7 @@
 	reflect/Annotation.c \
 	reflect/Proxy.c \
 	reflect/Reflect.c \
+	test/AtomicSpeed.c \
 	test/TestHash.c
 
 WITH_HPROF := $(strip $(WITH_HPROF))
diff --git a/vm/Dalvik.h b/vm/Dalvik.h
index 2c7bd7c..29abc2c 100644
--- a/vm/Dalvik.h
+++ b/vm/Dalvik.h
@@ -67,6 +67,7 @@
 #include "LinearAlloc.h"
 #include "analysis/DexVerify.h"
 #include "analysis/DexOptimize.h"
+#include "analysis/RegisterMap.h"
 #include "Init.h"
 #include "libdex/OpCode.h"
 #include "libdex/InstrUtils.h"
diff --git a/vm/Debugger.c b/vm/Debugger.c
index c667893..3affa97 100644
--- a/vm/Debugger.c
+++ b/vm/Debugger.c
@@ -2697,10 +2697,10 @@
     dvmUnlockThreadList();
 
     /*
-     * We change our thread status (which should be THREAD_RUNNING) so the
-     * VM can suspend for a GC if the invoke request causes us to run out
-     * of memory.  It's also a good idea to change it before locking the
-     * invokeReq mutex, although that should never be held for long.
+     * We change our (JDWP thread) status, which should be THREAD_RUNNING,
+     * so the VM can suspend for a GC if the invoke request causes us to
+     * run out of memory.  It's also a good idea to change it before locking
+     * the invokeReq mutex, although that should never be held for long.
      */
     Thread* self = dvmThreadSelf();
     int oldStatus = dvmChangeStatus(self, THREAD_VMWAIT);
@@ -2774,18 +2774,25 @@
 
 /*
  * Execute the method described by "*pReq".
+ *
+ * We're currently in VMWAIT, because we're stopped on a breakpoint.  We
+ * want to switch to RUNNING while we execute.
  */
 void dvmDbgExecuteMethod(DebugInvokeReq* pReq)
 {
     Thread* self = dvmThreadSelf();
     const Method* meth;
     Object* oldExcept;
+    int oldStatus;
 
     /*
      * We can be called while an exception is pending in the VM.  We need
      * to preserve that across the method invocation.
      */
     oldExcept = dvmGetException(self);
+    dvmClearException(self);
+
+    oldStatus = dvmChangeStatus(self, THREAD_RUNNING);
 
     /*
      * Translate the method through the vtable, unless we're calling a
@@ -2832,6 +2839,7 @@
 
     if (oldExcept != NULL)
         dvmSetException(self, oldExcept);
+    dvmChangeStatus(self, oldStatus);
 }
 
 // for dvmAddressSetForLine
diff --git a/vm/Globals.h b/vm/Globals.h
index f68e45d..0678ea2 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -99,6 +99,7 @@
 
     DexOptimizerMode    dexOptMode;
     DexClassVerifyMode  classVerifyMode;
+    bool        preciseGc;
     bool        generateRegisterMaps;
 
     int         assertionCtrlCount;
@@ -462,10 +463,13 @@
     /*
      * Compute some stats on loaded classes.
      */
-    int             numLoadedClasses;
-    int             numDeclaredMethods;
-    int             numDeclaredInstFields;
-    int             numDeclaredStaticFields;
+    int         numLoadedClasses;
+    int         numDeclaredMethods;
+    int         numDeclaredInstFields;
+    int         numDeclaredStaticFields;
+
+    /* when using a native debugger, set this to suppress watchdog timers */
+    bool        nativeDebuggerActive;
 
     /*
      * JDWP debugger support.
@@ -594,6 +598,9 @@
 #ifdef COUNT_PRECISE_METHODS
     PointerSet* preciseMethods;
 #endif
+
+    /* some RegisterMap statistics, useful during development */
+    void*       registerMapStats;
 };
 
 extern struct DvmGlobals gDvm;
diff --git a/vm/Init.c b/vm/Init.c
index 176910c..4ba10b7 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -95,6 +95,7 @@
     dvmFprintf(stderr, "  -Xjniopts:{warnonly,forcecopy}\n");
     dvmFprintf(stderr, "  -Xdeadlockpredict:{off,warn,err,abort}\n");
     dvmFprintf(stderr, "  -Xstacktracefile:<filename>\n");
+    dvmFprintf(stderr, "  -Xgc:[no]precise\n");
     dvmFprintf(stderr, "  -Xgenregmap\n");
     dvmFprintf(stderr, "  -Xcheckdexsum\n");
     dvmFprintf(stderr, "\n");
@@ -132,6 +133,9 @@
 #ifdef WITH_EXTRA_OBJECT_VALIDATION
         " extra_object_validation"
 #endif
+#ifdef WITH_EXTRA_GC_CHECKS
+        " extra_gc_checks"
+#endif
 #ifdef WITH_DALVIK_ASSERT
         " dalvik_assert"
 #endif
@@ -788,6 +792,18 @@
 
         } else if (strcmp(argv[i], "-Xgenregmap") == 0) {
             gDvm.generateRegisterMaps = true;
+            LOGD("Register maps will be generated during verification\n");
+
+        } else if (strncmp(argv[i], "-Xgc:", 5) == 0) {
+            if (strcmp(argv[i] + 5, "precise") == 0)
+                gDvm.preciseGc = true;
+            else if (strcmp(argv[i] + 5, "noprecise") == 0)
+                gDvm.preciseGc = false;
+            else {
+                dvmFprintf(stderr, "Bad value for -Xgc");
+                return -1;
+            }
+            LOGD("Precise GC configured %s\n", gDvm.preciseGc ? "ON" : "OFF");
 
         } else if (strcmp(argv[i], "-Xcheckdexsum") == 0) {
             gDvm.verifyDexChecksum = true;
@@ -810,6 +826,8 @@
 
 /*
  * Set defaults for fields altered or modified by arguments.
+ *
+ * Globals are initialized to 0 (a/k/a NULL or false).
  */
 static void setCommandLineDefaults()
 {
@@ -936,6 +954,14 @@
         goto fail;
     }
 
+#if WITH_EXTRA_GC_CHECKS > 1
+    /* only "portable" interp has the extra goodies */
+    if (gDvm.executionMode != kExecutionModeInterpPortable) {
+        LOGI("Switching to 'portable' interpreter for GC checks\n");
+        gDvm.executionMode = kExecutionModeInterpPortable;
+    }
+#endif
+
     /* configure signal handling */
     if (!gDvm.reduceSignals)
         blockSignals();
@@ -957,6 +983,8 @@
         goto fail;
     if (!dvmVerificationStartup())
         goto fail;
+    if (!dvmRegisterMapStartup())
+        goto fail;
     if (!dvmInstanceofStartup())
         goto fail;
     if (!dvmClassStartup())
@@ -1290,6 +1318,8 @@
         goto fail;
     if (!dvmVerificationStartup())
         goto fail;
+    if (!dvmRegisterMapStartup())
+        goto fail;
     if (!dvmInstanceofStartup())
         goto fail;
     if (!dvmClassStartup())
@@ -1370,6 +1400,7 @@
     dvmThreadShutdown();
     dvmClassShutdown();
     dvmVerificationShutdown();
+    dvmRegisterMapShutdown();
     dvmInstanceofShutdown();
     dvmInlineNativeShutdown();
     dvmGcShutdown();
diff --git a/vm/Init.h b/vm/Init.h
index 8549338..63051a2 100644
--- a/vm/Init.h
+++ b/vm/Init.h
@@ -41,9 +41,14 @@
     DexClassVerifyMode verifyMode, int dexoptFlags);
 
 /*
- * Unconditionally abort the entire VM.  Try not to use this.
+ * Replacement for fprintf() when we want to send a message to the console.
+ * This defaults to fprintf(), but will use the JNI fprintf callback if
+ * one was provided.
  */
-int dvmFprintf(FILE* fp, const char* format, ...);
-void dvmAbort(void);
+int dvmFprintf(FILE* fp, const char* format, ...)
+#if defined(__GNUC__)
+    __attribute__ ((format(printf, 2, 3)))
+#endif
+    ;
 
 #endif /*_DALVIK_INIT*/
diff --git a/vm/Jni.c b/vm/Jni.c
index f7a21ff..cb1821f 100644
--- a/vm/Jni.c
+++ b/vm/Jni.c
@@ -2653,7 +2653,7 @@
 {
     JNI_ENTER();
     jobjectRefType type;
-    
+
     if (obj == NULL)
         type = JNIInvalidRefType;
     else
@@ -2677,45 +2677,55 @@
 static jobject NewDirectByteBuffer(JNIEnv * env, void* address, jlong capacity)
 {
     jmethodID newBufferMethod;
-    jclass directBufferClass;
-    jclass platformaddressClass;
-    jobject platformaddress;
+    jclass directBufferClass = NULL;
+    jclass platformaddressClass = NULL;
+    jobject platformaddress = NULL;
     jmethodID onMethod;
+    jobject result = NULL;
 
     directBufferClass = (*env)->FindClass(env, 
             "java/nio/ReadWriteDirectByteBuffer");
 
     if(!directBufferClass)
     {
-        return NULL;
+        goto bail;
     }
 
     newBufferMethod = (*env)->GetMethodID(env, directBufferClass, "<init>",
             "(Lorg/apache/harmony/luni/platform/PlatformAddress;II)V");
     if(!newBufferMethod)
     {
-        return NULL;
+        goto bail;
     }
 
     platformaddressClass = (*env)->FindClass(env, 
             "org/apache/harmony/luni/platform/PlatformAddressFactory");
     if(!platformaddressClass)
     {
-        return NULL;
+        goto bail;
     }
 
     onMethod = (*env)->GetStaticMethodID(env, platformaddressClass, "on",
             "(I)Lorg/apache/harmony/luni/platform/PlatformAddress;");
     if(!onMethod)
     {
-        return NULL;
+        goto bail;
     }
 
-    platformaddress = (*env)->CallStaticObjectMethod(env, platformaddressClass, 
+    platformaddress = (*env)->CallStaticObjectMethod(env, platformaddressClass,
             onMethod, (jint)address);
 
-    return (*env)->NewObject(env, directBufferClass, newBufferMethod, 
+    result = (*env)->NewObject(env, directBufferClass, newBufferMethod, 
             platformaddress, (jint)capacity, (jint)0);
+
+bail:
+    if (directBufferClass != NULL)
+        (*env)->DeleteLocalRef(env, directBufferClass);
+    if (platformaddressClass != NULL)
+        (*env)->DeleteLocalRef(env, platformaddressClass);
+    if (platformaddress != NULL)
+        (*env)->DeleteLocalRef(env, platformaddress);
+    return result;
 }
 
 /*
@@ -2730,43 +2740,53 @@
 static void* GetDirectBufferAddress(JNIEnv * env, jobject buf)
 {
     jmethodID tempMethod;
-    jclass tempClass;
-    jobject platformAddr;
-    jclass platformAddrClass;
+    jclass tempClass = NULL;
+    jobject platformAddr = NULL;
+    jclass platformAddrClass = NULL;
     jmethodID toLongMethod;
+    void* result = NULL;
 
     tempClass = (*env)->FindClass(env, 
             "org/apache/harmony/nio/internal/DirectBuffer");
     if(!tempClass)
     {
-        return 0;
+        goto bail;
     }
 
     if(JNI_FALSE == (*env)->IsInstanceOf(env, buf, tempClass))
     {
-        return 0;
+        goto bail;
     }
 
     tempMethod = (*env)->GetMethodID(env, tempClass, "getBaseAddress",
-             "()Lorg/apache/harmony/luni/platform/PlatformAddress;");        
+             "()Lorg/apache/harmony/luni/platform/PlatformAddress;");
     if(!tempMethod){
-        return 0;
-    }    
+        goto bail;
+    }
     platformAddr = (*env)->CallObjectMethod(env, buf, tempMethod);
     platformAddrClass = (*env)->FindClass (env, 
             "org/apache/harmony/luni/platform/PlatformAddress");
     if(!platformAddrClass)
     {
-        return 0;
+        goto bail;
 
     }
     toLongMethod = (*env)->GetMethodID(env, platformAddrClass, "toLong", "()J");
     if (!toLongMethod)
     {
-        return 0;
+        goto bail;
     }
 
-    return (void*)(u4)(*env)->CallLongMethod(env, platformAddr, toLongMethod);    
+    result = (void*)(u4)(*env)->CallLongMethod(env, platformAddr, toLongMethod);
+
+bail:
+    if (tempClass != NULL)
+        (*env)->DeleteLocalRef(env, tempClass);
+    if (platformAddr != NULL)
+        (*env)->DeleteLocalRef(env, platformAddr);
+    if (platformAddrClass != NULL)
+        (*env)->DeleteLocalRef(env, platformAddrClass);
+    return result;
 }
 
 /*
@@ -2781,34 +2801,42 @@
 static jlong GetDirectBufferCapacity(JNIEnv * env, jobject buf)
 {
     jfieldID fieldCapacity;
-    jclass directBufferClass;
-    jclass bufferClass;
+    jclass directBufferClass = NULL;
+    jclass bufferClass = NULL;
+    jlong result = -1;
 
     directBufferClass = (*env)->FindClass(env,
             "org/apache/harmony/nio/internal/DirectBuffer");
     if (!directBufferClass)
     {
-        return -1;
+        goto bail;
     }
 
     if (JNI_FALSE == (*env)->IsInstanceOf(env, buf, directBufferClass))
     {
-        return -1;
+        goto bail;
     }
 
     bufferClass = (*env)->FindClass(env, "java/nio/Buffer");
     if (!bufferClass)
     {
-        return -1;
+        goto bail;
     }
 
     fieldCapacity = (*env)->GetFieldID(env, bufferClass, "capacity", "I");
     if (!fieldCapacity)
     {
-        return -1;
+        goto bail;
     }
 
-    return (*env)->GetIntField(env, buf, fieldCapacity);
+    result = (*env)->GetIntField(env, buf, fieldCapacity);
+
+bail:
+    if (directBufferClass != NULL)
+        (*env)->DeleteLocalRef(env, directBufferClass);
+    if (bufferClass != NULL)
+        (*env)->DeleteLocalRef(env, bufferClass);
+    return result;
 }
 
 
diff --git a/vm/Misc.h b/vm/Misc.h
index 5f3af7b..bd6fece 100644
--- a/vm/Misc.h
+++ b/vm/Misc.h
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Miscellaneous utility functions.
  */
@@ -143,7 +144,11 @@
  * Print a debug message.
  */
 void dvmPrintDebugMessage(const DebugOutputTarget* target, const char* format,
-    ...);
+    ...)
+#if defined(__GNUC__)
+    __attribute__ ((format(printf, 2, 3)))
+#endif
+    ;
 
 
 /*
@@ -277,6 +282,15 @@
  */
 bool dvmSetCloseOnExec(int fd);
 
+/*
+ * Unconditionally abort the entire VM.  Try not to use this.
+ */
+void dvmAbort(void)
+#if defined(__GNUC__)
+    __attribute__ ((noreturn))
+#endif
+    ;
+
 #if (!HAVE_STRLCPY)
 /* Implementation of strlcpy() for platforms that don't already have it. */
 size_t strlcpy(char *dst, const char *src, size_t size);
diff --git a/vm/Native.c b/vm/Native.c
index 7a153d6..4fb9795 100644
--- a/vm/Native.c
+++ b/vm/Native.c
@@ -411,19 +411,25 @@
      * doesn't have to search through LD_LIBRARY_PATH.  (It may do so to
      * resolve this library's dependencies though.)
      *
-     * Failures here are expected when java.library.path has several entries.
+     * Failures here are expected when java.library.path has several entries
+     * and we have to hunt for the lib.
      *
      * The current android-arm dynamic linker implementation tends to
      * return "Cannot find library" from dlerror() regardless of the actual
-     * problem.  A more useful diagnostic may be sent to stdout/stderr,
-     * but often that's not visible.  Some things to try:
+     * problem.  A more useful diagnostic may be sent to stdout/stderr if
+     * linker diagnostics are enabled, but that's not usually visible in
+     * Android apps.  Some things to try:
      *   - make sure the library exists on the device
      *   - verify that the right path is being opened (the debug log message
      *     above can help with that)
-     *   - check to see if the library is valid
+     *   - check to see if the library is valid (e.g. not zero bytes long)
      *   - check config/prelink-linux-arm.map to ensure that the library
      *     is listed and is not being overrun by the previous entry (if
-     *     loading suddenly stops working, this is a good one to check)
+     *     loading suddenly stops working on a prelinked library, this is
+     *     a good one to check)
+     *   - write a trivial app that calls sleep() then dlopen(), attach
+     *     to it with "strace -p <pid>" while it sleeps, and watch for
+     *     attempts to open nonexistent dependent shared libs
      */
     handle = dlopen(pathName, RTLD_LAZY);
     if (handle == NULL) {
diff --git a/vm/Profile.c b/vm/Profile.c
index 9b47885..f7e6c17 100644
--- a/vm/Profile.c
+++ b/vm/Profile.c
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Android's method call profiling goodies.
  */
@@ -388,6 +389,7 @@
     return;
 
 fail:
+    updateActiveProfilers(-1);
     if (state->traceFile != NULL) {
         fclose(state->traceFile);
         state->traceFile = NULL;
@@ -448,6 +450,15 @@
 }
 
 /*
+ * Returns "true" if method tracing is currently active.
+ */
+bool dvmIsMethodTraceActive(void)
+{
+    const MethodTraceState* state = &gDvm.methodTrace;
+    return state->traceEnabled;
+}
+
+/*
  * Stop method tracing.  We write the buffer to disk and generate a key
  * file so we can interpret it.
  */
@@ -464,6 +475,7 @@
 
     if (!state->traceEnabled) {
         /* somebody already stopped it, or it was never started */
+        LOGD("TRACE stop requested, but not running\n");
         dvmUnlockMutex(&state->startStopLock);
         return;
     } else {
diff --git a/vm/Profile.h b/vm/Profile.h
index f762974..cdaf027 100644
--- a/vm/Profile.h
+++ b/vm/Profile.h
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Android's method call profiling goodies.
  */
@@ -96,6 +97,7 @@
  * Start/stop method tracing.
  */
 void dvmMethodTraceStart(const char* traceFileName, int bufferSize, int flags);
+bool dvmIsMethodTraceActive(void);
 void dvmMethodTraceStop(void);
 
 /*
diff --git a/vm/Thread.c b/vm/Thread.c
index 42b527e..497abaa 100644
--- a/vm/Thread.c
+++ b/vm/Thread.c
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Thread support.
  */
@@ -478,6 +479,20 @@
     assert(cc == 0);
 }
 
+/*
+ * Convert SuspendCause to a string.
+ */
+static const char* getSuspendCauseStr(SuspendCause why)
+{
+    switch (why) {
+    case SUSPEND_NOT:               return "NOT?";
+    case SUSPEND_FOR_GC:            return "gc";
+    case SUSPEND_FOR_DEBUG:         return "debug";
+    case SUSPEND_FOR_DEBUG_EVENT:   return "debug-event";
+    case SUSPEND_FOR_STACK_DUMP:    return "stack-dump";
+    default:                        return "UNKNOWN";
+    }
+}
 
 /*
  * Grab the "thread suspend" lock.  This is required to prevent the
@@ -489,7 +504,6 @@
  */
 static void lockThreadSuspend(const char* who, SuspendCause why)
 {
-    const int kMaxRetries = 10;
     const int kSpinSleepTime = 3*1000*1000;        /* 3s */
     u8 startWhen = 0;       // init req'd to placate gcc
     int sleepIter = 0;
@@ -500,23 +514,30 @@
         if (cc != 0) {
             if (!dvmCheckSuspendPending(NULL)) {
                 /*
-                 * Could be unusual JNI-attach thing, could be we hit
-                 * the window as the suspend or resume was started.  Could
-                 * also be the debugger telling us to resume at roughly
+                 * Could be we hit the window as the suspend or resume
+                 * was started (i.e. the lock has been grabbed but the
+                 * other thread hasn't yet set our "please suspend" flag).
+                 *
+                 * Could be an unusual JNI thread-attach thing.
+                 *
+                 * Could be the debugger telling us to resume at roughly
                  * the same time we're posting an event.
                  */
-                LOGI("threadid=%d ODD: thread-suspend lock held (%s:%d)"
-                     " but suspend not pending\n",
-                    dvmThreadSelf()->threadId, who, why);
+                LOGI("threadid=%d ODD: want thread-suspend lock (%s:%s),"
+                     " it's held, no suspend pending\n",
+                    dvmThreadSelf()->threadId, who, getSuspendCauseStr(why));
+            } else {
+                /* we suspended; reset timeout */
+                sleepIter = 0;
             }
 
             /* give the lock-holder a chance to do some work */
             if (sleepIter == 0)
                 startWhen = dvmGetRelativeTimeUsec();
             if (!dvmIterativeSleep(sleepIter++, kSpinSleepTime, startWhen)) {
-                LOGE("threadid=%d: couldn't get thread-suspend lock (%s:%d),"
+                LOGE("threadid=%d: couldn't get thread-suspend lock (%s:%s),"
                      " bailing\n",
-                    dvmThreadSelf()->threadId, who, why);
+                    dvmThreadSelf()->threadId, who, getSuspendCauseStr(why));
                 dvmDumpAllThreads(false);
                 dvmAbort();
             }
@@ -2179,8 +2200,15 @@
                 &gDvm.threadSuspendCountLock);
         assert(cc == 0);
         if (self->suspendCount != 0) {
-            LOGD("threadid=%d: still suspended after undo (s=%d d=%d)\n",
-                self->threadId, self->suspendCount, self->dbgSuspendCount);
+            /*
+             * The condition was signaled but we're still suspended.  This
+             * can happen if the debugger lets go while a SIGQUIT thread
+             * dump event is pending (assuming SignalCatcher was resumed for
+             * just long enough to try to grab the thread-suspend lock).
+             */
+            LOGD("threadid=%d: still suspended after undo (sc=%d dc=%d s=%c)\n",
+                self->threadId, self->suspendCount, self->dbgSuspendCount,
+                self->isSuspended ? 'Y' : 'N');
         }
     }
     assert(self->suspendCount == 0 && self->dbgSuspendCount == 0);
@@ -2898,9 +2926,9 @@
         threadName, isDaemon ? " daemon" : "",
         priority, thread->threadId, kStatusNames[thread->status]);
     dvmPrintDebugMessage(target,
-        "  | group=\"%s\" sCount=%d dsCount=%d s=%d obj=%p\n",
+        "  | group=\"%s\" sCount=%d dsCount=%d s=%c obj=%p\n",
         groupName, thread->suspendCount, thread->dbgSuspendCount,
-        thread->isSuspended, thread->threadObj);
+        thread->isSuspended ? 'Y' : 'N', thread->threadObj);
     dvmPrintDebugMessage(target,
         "  | sysTid=%d nice=%d sched=%d/%d handle=%d\n",
         thread->systemTid, getpriority(PRIO_PROCESS, thread->systemTid),
@@ -3127,9 +3155,15 @@
  * GC helper functions
  */
 
+/*
+ * Add the contents of the registers from the interpreted call stack.
+ */
 static void gcScanInterpStackReferences(Thread *thread)
 {
     const u4 *framePtr;
+#if WITH_EXTRA_GC_CHECKS > 1
+    bool first = true;
+#endif
 
     framePtr = (const u4 *)thread->curFrame;
     while (framePtr != NULL) {
@@ -3138,27 +3172,184 @@
 
         saveArea = SAVEAREA_FROM_FP(framePtr);
         method = saveArea->method;
-        if (method != NULL) {
+        if (method != NULL && !dvmIsNativeMethod(method)) {
 #ifdef COUNT_PRECISE_METHODS
             /* the GC is running, so no lock required */
-            if (!dvmIsNativeMethod(method)) {
-                if (dvmPointerSetAddEntry(gDvm.preciseMethods, method))
-                    LOGI("Added %s.%s %p\n",
-                        method->clazz->descriptor, method->name, method);
-            }
+            if (dvmPointerSetAddEntry(gDvm.preciseMethods, method))
+                LOGI("PGC: added %s.%s %p\n",
+                    method->clazz->descriptor, method->name, method);
 #endif
-            int i;
-            for (i = method->registersSize - 1; i >= 0; i--) {
-                u4 rval = *framePtr++;
-//TODO: wrap markifobject in a macro that does pointer checks
-                if (rval != 0 && (rval & 0x3) == 0) {
-                    dvmMarkIfObject((Object *)rval);
+#if WITH_EXTRA_GC_CHECKS > 1
+            /*
+             * May also want to enable the memset() in the "invokeMethod"
+             * goto target in the portable interpreter.  That sets the stack
+             * to a pattern that makes referring to uninitialized data
+             * very obvious.
+             */
+
+            if (first) {
+                /*
+                 * First frame, isn't native, check the "alternate" saved PC
+                 * as a sanity check.
+                 *
+                 * It seems like we could check the second frame if the first
+                 * is native, since the PCs should be the same.  It turns out
+                 * this doesn't always work.  The problem is that we could
+                 * have calls in the sequence:
+                 *   interp method #2
+                 *   native method
+                 *   interp method #1
+                 *
+                 * and then GC while in the native method after returning
+                 * from interp method #2.  The currentPc on the stack is
+                 * for interp method #1, but thread->currentPc2 is still
+                 * set for the last thing interp method #2 did.
+                 *
+                 * This can also happen in normal execution:
+                 * - sget-object on not-yet-loaded class
+                 * - class init updates currentPc2
+                 * - static field init is handled by parsing annotations;
+                 *   static String init requires creation of a String object,
+                 *   which can cause a GC
+                 *
+                 * Essentially, any pattern that involves executing
+                 * interpreted code and then causes an allocation without
+                 * executing instructions in the original method will hit
+                 * this.  These are rare enough that the test still has
+                 * some value.
+                 */
+                if (saveArea->xtra.currentPc != thread->currentPc2) {
+                    LOGW("PGC: savedPC(%p) != current PC(%p), %s.%s ins=%p\n",
+                        saveArea->xtra.currentPc, thread->currentPc2,
+                        method->clazz->descriptor, method->name, method->insns);
+                    if (saveArea->xtra.currentPc != NULL)
+                        LOGE("  pc inst = 0x%04x\n", *saveArea->xtra.currentPc);
+                    if (thread->currentPc2 != NULL)
+                        LOGE("  pc2 inst = 0x%04x\n", *thread->currentPc2);
+                    dvmDumpThread(thread, false);
                 }
+            } else {
+                /*
+                 * It's unusual, but not impossible, for a non-first frame
+                 * to be at something other than a method invocation.  For
+                 * example, if we do a new-instance on a nonexistent class,
+                 * we'll have a lot of class loader activity on the stack
+                 * above the frame with the "new" operation.  Could also
+                 * happen while we initialize a Throwable when an instruction
+                 * fails.
+                 *
+                 * So there's not much we can do here to verify the PC,
+                 * except to verify that it's a GC point.
+                 */
+            }
+            assert(saveArea->xtra.currentPc != NULL);
+#endif
+
+            const RegisterMap* pMap;
+            const u1* regVector;
+            int i;
+
+            pMap = method->registerMap;
+            if (pMap != NULL) {
+                /* found map, get registers for this address */
+                int addr = saveArea->xtra.currentPc - method->insns;
+                regVector = dvmGetRegisterMapLine(pMap, addr);
+                if (regVector == NULL) {
+                    LOGW("PGC: map but no entry for %s.%s addr=0x%04x\n",
+                        method->clazz->descriptor, method->name, addr);
+                } else {
+                    LOGV("PGC: found map for %s.%s 0x%04x (t=%d)\n",
+                        method->clazz->descriptor, method->name, addr,
+                        thread->threadId);
+                }
+            } else {
+                /*
+                 * No map found.  If precise GC is disabled this is
+                 * expected -- we don't create pointers to the map data even
+                 * if it's present -- but if it's enabled it means we're
+                 * unexpectedly falling back on a conservative scan, so it's
+                 * worth yelling a little.
+                 *
+                 * TODO: we should be able to remove this for production --
+                 * no need to keep banging on the global.
+                 */
+                if (gDvm.preciseGc) {
+                    LOGI("PGC: no map for %s.%s\n",
+                        method->clazz->descriptor, method->name);
+                }
+                regVector = NULL;
+            }
+
+            if (regVector == NULL) {
+                /* conservative scan */
+                for (i = method->registersSize - 1; i >= 0; i--) {
+                    u4 rval = *framePtr++;
+                    if (rval != 0 && (rval & 0x3) == 0) {
+                        dvmMarkIfObject((Object *)rval);
+                    }
+                }
+            } else {
+                /*
+                 * Precise scan.  v0 is at the lowest address on the
+                 * interpreted stack, and is the first bit in the register
+                 * vector, so we can walk through the register map and
+                 * memory in the same direction.
+                 *
+                 * A '1' bit indicates a live reference.
+                 */
+                u2 bits = 1 << 1;
+                for (i = method->registersSize - 1; i >= 0; i--) {
+                    u4 rval = *framePtr++;
+
+                    bits >>= 1;
+                    if (bits == 1) {
+                        /* set bit 9 so we can tell when we're empty */
+                        bits = *regVector++ | 0x0100;
+                        LOGVV("loaded bits: 0x%02x\n", bits & 0xff);
+                    }
+
+                    if (rval != 0 && (bits & 0x01) != 0) {
+                        /*
+                         * Non-null, register marked as live reference.  This
+                         * should always be a valid object.
+                         */
+#if WITH_EXTRA_GC_CHECKS > 0
+                        if ((rval & 0x3) != 0 ||
+                            !dvmIsValidObject((Object*) rval))
+                        {
+                            /* this is very bad */
+                            LOGE("PGC: invalid ref in reg %d: 0x%08x\n",
+                                method->registersSize-1 - i, rval);
+                        } else
+#endif
+                        {
+                            dvmMarkObjectNonNull((Object *)rval);
+                        }
+                    } else {
+                        /*
+                         * Null or non-reference, do nothing at all.
+                         */
+#if WITH_EXTRA_GC_CHECKS > 1
+                        if (dvmIsValidObject((Object*) rval)) {
+                            /* this is normal, but we feel chatty */
+                            LOGD("PGC: ignoring valid ref in reg %d: 0x%08x\n",
+                                method->registersSize-1 - i, rval);
+                        }
+#endif
+                    }
+                }
+                dvmReleaseRegisterMapLine(pMap, regVector);
             }
         }
-        /* else this is a break frame; nothing to mark.
+        /* else this is a break frame and there is nothing to mark, or
+         * this is a native method and the registers are just the "ins",
+         * copied from various registers in the caller's set.
          */
 
+#if WITH_EXTRA_GC_CHECKS > 1
+        first = false;
+#endif
+
         /* Don't fall into an infinite loop if things get corrupted.
          */
         assert((uintptr_t)saveArea->prevFrame > (uintptr_t)framePtr ||
diff --git a/vm/Thread.h b/vm/Thread.h
index b64f9b7..f0b5541 100644
--- a/vm/Thread.h
+++ b/vm/Thread.h
@@ -203,6 +203,11 @@
 #ifdef WITH_JNI_STACK_CHECK
     u4          stackCrc;
 #endif
+
+#if WITH_EXTRA_GC_CHECKS > 1
+    /* PC, saved on every instruction; redundant with StackSaveArea */
+    const u2*   currentPc2;
+#endif
 } Thread;
 
 /* start point for an internal thread; mimics pthread args */
@@ -277,12 +282,11 @@
 bool dvmCheckSuspendPending(Thread* self);
 
 /*
- * Fast test for use in the interpreter.  If our suspend count is nonzero,
- * do a more rigorous evaluation.
+ * Fast test for use in the interpreter.  Returns "true" if our suspend
+ * count is nonzero.
  */
-INLINE void dvmCheckSuspendQuick(Thread* self) {
-    if (self->suspendCount != 0)
-        dvmCheckSuspendPending(self);
+INLINE bool dvmCheckSuspendQuick(Thread* self) {
+    return (self->suspendCount != 0);
 }
 
 /*
diff --git a/vm/alloc/Alloc.h b/vm/alloc/Alloc.h
index 0489db7..8bf4520 100644
--- a/vm/alloc/Alloc.h
+++ b/vm/alloc/Alloc.h
@@ -133,7 +133,7 @@
     }
 #ifdef WITH_EXTRA_OBJECT_VALIDATION
     if (!dvmIsValidObject(obj)) {
-        //abort();
+        //dvmAbort();
         dvmThrowException("Ljava/lang/InternalError;",
             "VM detected invalid object ptr");
         return false;
@@ -142,7 +142,7 @@
 #ifndef NDEBUG
     /* check for heap corruption */
     if (obj->clazz == NULL || ((u4) obj->clazz) <= 65536) {
-        abort();
+        dvmAbort();
         dvmThrowException("Ljava/lang/InternalError;",
             "VM detected invalid object class ptr");
         return false;
diff --git a/vm/alloc/Heap.c b/vm/alloc/Heap.c
index 9ddc8be..6f3c7c1 100644
--- a/vm/alloc/Heap.c
+++ b/vm/alloc/Heap.c
@@ -174,6 +174,7 @@
         if (self != NULL) {
             oldStatus = dvmChangeStatus(self, THREAD_VMWAIT);
         } else {
+            LOGI("ODD: waiting on heap lock, no self\n");
             oldStatus = -1; // shut up gcc
         }
 
@@ -831,6 +832,8 @@
     if (gcHeap->hprofDumpOnGc) {
         char nameBuf[128];
 
+        gcHeap->hprofResult = -1;
+
         if (gcHeap->hprofFileName == NULL) {
             /* no filename was provided; invent one */
             sprintf(nameBuf, "/data/misc/heap-dump-tm%d-pid%d.hprof",
@@ -860,7 +863,10 @@
 
     /* Set up the marking context.
      */
-    dvmHeapBeginMarkStep();
+    if (!dvmHeapBeginMarkStep()) {
+        LOGE_HEAP("dvmHeapBeginMarkStep failed; aborting\n");
+        dvmAbort();
+    }
 
     /* Mark the set of objects that are strongly reachable from the roots.
      */
@@ -982,7 +988,8 @@
     if (gcHeap->hprofContext != NULL) {
         hprofFinishHeapDump(gcHeap->hprofContext);
 //TODO: write a HEAP_SUMMARY record
-        hprofShutdown(gcHeap->hprofContext);
+        if (hprofShutdown(gcHeap->hprofContext))
+            gcHeap->hprofResult = 0;    /* indicate success */
         gcHeap->hprofContext = NULL;
     }
 #endif
@@ -1046,16 +1053,23 @@
  * Perform garbage collection, writing heap information to the specified file.
  *
  * If "fileName" is NULL, a suitable name will be generated automatically.
+ *
+ * Returns 0 on success, or an error code on failure.
  */
-void hprofDumpHeap(const char* fileName)
+int hprofDumpHeap(const char* fileName)
 {
+    int result;
+
     dvmLockMutex(&gDvm.gcHeapLock);
 
     gDvm.gcHeap->hprofDumpOnGc = true;
     gDvm.gcHeap->hprofFileName = fileName;
     dvmCollectGarbageInternal(false);
+    result = gDvm.gcHeap->hprofResult;
 
     dvmUnlockMutex(&gDvm.gcHeapLock);
+
+    return result;
 }
 
 void dvmHeapSetHprofGcScanState(hprof_heap_tag_t state, u4 threadSerialNumber)
diff --git a/vm/alloc/HeapInternal.h b/vm/alloc/HeapInternal.h
index 7851983..fafb87a 100644
--- a/vm/alloc/HeapInternal.h
+++ b/vm/alloc/HeapInternal.h
@@ -189,6 +189,7 @@
     bool            hprofDumpOnGc;
     const char*     hprofFileName;
     hprof_context_t *hprofContext;
+    int             hprofResult;
 #endif
 };
 
diff --git a/vm/alloc/HeapWorker.c b/vm/alloc/HeapWorker.c
index 0244cca..b4a2d0e 100644
--- a/vm/alloc/HeapWorker.c
+++ b/vm/alloc/HeapWorker.c
@@ -111,7 +111,7 @@
 }
 
 /* Make sure that the HeapWorker thread hasn't spent an inordinate
- * amount of time inside interpreted a finalizer.
+ * amount of time inside a finalizer.
  *
  * Aborts the VM if the thread appears to be wedged.
  *
@@ -132,12 +132,16 @@
         u8 nowCpu = dvmGetOtherThreadCpuTimeUsec(gDvm.heapWorkerHandle);
         u8 deltaCpu = nowCpu - heapWorkerInterpCpuStartTime;
 
-        if (delta > HEAP_WORKER_WATCHDOG_TIMEOUT && gDvm.debuggerActive) {
+        if (delta > HEAP_WORKER_WATCHDOG_TIMEOUT &&
+            (gDvm.debuggerActive || gDvm.nativeDebuggerActive))
+        {
             /*
              * Debugger suspension can block the thread indefinitely.  For
              * best results we should reset this explicitly whenever the
-             * HeapWorker thread is resumed.  Ignoring the yelp isn't
-             * quite right but will do for a quick fix.
+             * HeapWorker thread is resumed.  Unfortunately this is also
+             * affected by native debuggers, and we have no visibility
+             * into how they're manipulating us.  So, we ignore the
+             * watchdog and just reset the timer.
              */
             LOGI("Debugger is attached -- suppressing HeapWorker watchdog\n");
             heapWorkerInterpStartTime = now;        /* reset timer */
diff --git a/vm/alloc/MarkSweep.c b/vm/alloc/MarkSweep.c
index a0601d7..0905bce 100644
--- a/vm/alloc/MarkSweep.c
+++ b/vm/alloc/MarkSweep.c
@@ -22,6 +22,7 @@
 #include <limits.h>     // for ULONG_MAX
 #include <sys/mman.h>   // for madvise(), mmap()
 #include <cutils/ashmem.h>
+#include <errno.h>
 
 #define GC_DEBUG_PARANOID   2
 #define GC_DEBUG_BASIC      1
@@ -92,7 +93,7 @@
 {
     const Object **limit;
     size_t size;
-    int fd;
+    int fd, err;
 
     /* Create a stack big enough for the worst possible case,
      * where the heap is perfectly full of the smallest object.
@@ -104,14 +105,17 @@
     size = ALIGN_UP_TO_PAGE_SIZE(size);
     fd = ashmem_create_region("dalvik-heap-markstack", size);
     if (fd < 0) {
-        LOGE_GC("Could not create %d-byte ashmem mark stack\n", size);
+        LOGE_GC("Could not create %d-byte ashmem mark stack: %s\n",
+            size, strerror(errno));
         return false;
     }
     limit = (const Object **)mmap(NULL, size, PROT_READ | PROT_WRITE,
             MAP_PRIVATE, fd, 0);
+    err = errno;
     close(fd);
     if (limit == MAP_FAILED) {
-        LOGE_GC("Could not mmap %d-byte ashmem mark stack\n", size);
+        LOGE_GC("Could not mmap %d-byte ashmem mark stack: %s\n",
+            size, strerror(err));
         return false;
     }
 
diff --git a/vm/analysis/CodeVerify.c b/vm/analysis/CodeVerify.c
index 65aa833..ed132e2 100644
--- a/vm/analysis/CodeVerify.c
+++ b/vm/analysis/CodeVerify.c
@@ -72,9 +72,9 @@
 static inline bool doVerboseLogging(const Method* meth) {
     return false;       /* COMMENT OUT to enable verbose debugging */
 
-    const char* cd = "Lop_lshr;";
-    const char* mn = "test";
-    const char* sg = "(II)J";
+    const char* cd = "Landroid/net/http/Request;";
+    const char* mn = "readResponse";
+    const char* sg = "(Landroid/net/http/AndroidHttpClientConnection;)V";
     return (strcmp(meth->clazz->descriptor, cd) == 0 &&
             dvmCompareNameDescriptorAndMethod(mn, sg, meth) == 0);
 }
@@ -327,7 +327,7 @@
  */
 static RegType primitiveTypeToRegType(PrimitiveType primType)
 {
-    struct {
+    static const struct {
         RegType         regType;        /* type equivalent */
         PrimitiveType   primType;       /* verification */
     } convTab[] = {
@@ -5173,13 +5173,11 @@
             updateRegisters(meth, insnFlags, regTable, insnIdx+insnWidth,
                 workRegs);
         } else {
-            /* if not yet visited, or regs were updated, set "changed" */
-            if (!dvmInsnIsVisited(insnFlags, insnIdx+insnWidth) ||
-                compareRegisters(workRegs, entryRegs,
-                    insnRegCount + kExtraRegs) != 0)
-            {
-                dvmInsnSetChanged(insnFlags, insnIdx+insnWidth, true);
-            }
+            /*
+             * We didn't record register data for the next entry, so we have
+             * to assume that something has changed and re-evaluate it.
+             */
+            dvmInsnSetChanged(insnFlags, insnIdx+insnWidth, true);
         }
     }
 
@@ -5209,6 +5207,7 @@
         if (!checkMoveException(meth, insnIdx+branchTarget, "branch"))
             goto bail;
 
+        /* update branch target, set "changed" if appropriate */
         updateRegisters(meth, insnFlags, regTable, insnIdx+branchTarget,
             workRegs);
     }
diff --git a/vm/analysis/CodeVerify.h b/vm/analysis/CodeVerify.h
index 0cd4638..55fe41c 100644
--- a/vm/analysis/CodeVerify.h
+++ b/vm/analysis/CodeVerify.h
@@ -198,10 +198,10 @@
     return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
 }
 INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
-    bool isBranch)
+    bool isGcPoint)
 {
-    assert(isBranch);
-    //if (isBranch)
+    assert(isGcPoint);
+    //if (isGcPoint)
         insnFlags[addr] |= kInsnFlagGcPoint;
     //else
     //    insnFlags[addr] &= ~kInsnFlagGcPoint;
diff --git a/vm/analysis/DexOptimize.c b/vm/analysis/DexOptimize.c
index d086b99..70b3278 100644
--- a/vm/analysis/DexOptimize.c
+++ b/vm/analysis/DexOptimize.c
@@ -25,6 +25,7 @@
 #include "Dalvik.h"
 #include "libdex/InstrUtils.h"
 #include "libdex/OptInvocation.h"
+#include "analysis/RegisterMap.h"
 
 #include <zlib.h>
 
@@ -50,7 +51,7 @@
 /* fwd */
 static int writeDependencies(int fd, u4 modWhen, u4 crc);
 static bool writeAuxData(int fd, const DexClassLookup* pClassLookup,\
-    const IndexMapSet* pIndexMapSet);
+    const IndexMapSet* pIndexMapSet, const RegisterMapBuilder* pRegMapBuilder);
 static void logFailedWrite(size_t expected, ssize_t actual, const char* msg,
     int err);
 
@@ -506,6 +507,7 @@
 {
     DexClassLookup* pClassLookup = NULL;
     IndexMapSet* pIndexMapSet = NULL;
+    RegisterMapBuilder* pRegMapBuilder = NULL;
     bool doVerify, doOpt;
     u4 headerFlags = 0;
 
@@ -566,6 +568,13 @@
          * Rewrite the file.  Byte reordering, structure realigning,
          * class verification, and bytecode optimization are all performed
          * here.
+         *
+         * In theory the file could change size and bits could shift around.
+         * In practice this would be annoying to deal with, so the file
+         * layout is designed so that it can always be rewritten in place.
+         *
+         * This sets "headerFlags" and creates the class lookup table as
+         * part of doing the processing.
          */
         success = rewriteDex(((u1*) mapAddr) + dexOffset, dexLength,
                     doVerify, doOpt, &headerFlags, &pClassLookup);
@@ -576,6 +585,7 @@
 
             if (dvmDexFileOpenPartial(dexAddr, dexLength, &pDvmDex) != 0) {
                 LOGE("Unable to create DexFile\n");
+                success = false;
             } else {
                 /*
                  * If configured to do so, scan the instructions, looking
@@ -586,6 +596,18 @@
                  */
                 pIndexMapSet = dvmRewriteConstants(pDvmDex);
 
+                /*
+                 * If configured to do so, generate a full set of register
+                 * maps for all verified classes.
+                 */
+                if (gDvm.generateRegisterMaps) {
+                    pRegMapBuilder = dvmGenerateRegisterMaps(pDvmDex);
+                    if (pRegMapBuilder == NULL) {
+                        LOGE("Failed generating register maps\n");
+                        success = false;
+                    }
+                }
+
                 updateChecksum(dexAddr, dexLength,
                     (DexHeader*) pDvmDex->pHeader);
 
@@ -640,8 +662,7 @@
         goto bail;
     }
 
-
-    /* compute deps length, and adjust aux start for 64-bit alignment */
+    /* compute deps length, then adjust aux start for 64-bit alignment */
     auxOffset = lseek(fd, 0, SEEK_END);
     depsLength = auxOffset - depsOffset;
 
@@ -656,7 +677,7 @@
     /*
      * Append any auxillary pre-computed data structures.
      */
-    if (!writeAuxData(fd, pClassLookup, pIndexMapSet)) {
+    if (!writeAuxData(fd, pClassLookup, pIndexMapSet, pRegMapBuilder)) {
         LOGW("Failed writing aux data\n");
         goto bail;
     }
@@ -692,8 +713,11 @@
     LOGV("Successfully wrote DEX header\n");
     result = true;
 
+    //dvmRegisterMapDumpStats();
+
 bail:
     dvmFreeIndexMapSet(pIndexMapSet);
+    dvmFreeRegisterMapBuilder(pRegMapBuilder);
     free(pClassLookup);
     return result;
 }
@@ -1085,19 +1109,28 @@
  * so it can be used directly when the file is mapped for reading.
  */
 static bool writeAuxData(int fd, const DexClassLookup* pClassLookup,
-    const IndexMapSet* pIndexMapSet)
+    const IndexMapSet* pIndexMapSet, const RegisterMapBuilder* pRegMapBuilder)
 {
     /* pre-computed class lookup hash table */
-    if (!writeChunk(fd, (u4) kDexChunkClassLookup, pClassLookup,
-            pClassLookup->size))
+    if (!writeChunk(fd, (u4) kDexChunkClassLookup,
+            pClassLookup, pClassLookup->size))
     {
         return false;
     }
 
     /* remapped constants (optional) */
     if (pIndexMapSet != NULL) {
-        if (!writeChunk(fd, pIndexMapSet->chunkType, pIndexMapSet->chunkData,
-                pIndexMapSet->chunkDataLen))
+        if (!writeChunk(fd, pIndexMapSet->chunkType,
+                pIndexMapSet->chunkData, pIndexMapSet->chunkDataLen))
+        {
+            return false;
+        }
+    }
+
+    /* register maps (optional) */
+    if (pRegMapBuilder != NULL) {
+        if (!writeChunk(fd, (u4) kDexChunkRegisterMaps,
+                pRegMapBuilder->data, pRegMapBuilder->size))
         {
             return false;
         }
diff --git a/vm/analysis/DexOptimize.h b/vm/analysis/DexOptimize.h
index 01aa828..4b6ab6b 100644
--- a/vm/analysis/DexOptimize.h
+++ b/vm/analysis/DexOptimize.h
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * DEX optimization declarations.
  */
diff --git a/vm/analysis/DexVerify.c b/vm/analysis/DexVerify.c
index 354d68f..84f34c8 100644
--- a/vm/analysis/DexVerify.c
+++ b/vm/analysis/DexVerify.c
@@ -533,15 +533,36 @@
     dvmInsnSetBranchTarget(insnFlags, 0, true);
 
     for (i = 0; i < insnCount; /**/) {
-        static int gcMask = kInstrCanBranch | kInstrCanSwitch |
+        /*
+         * These types of instructions can be GC points.  To support precise
+         * GC, all such instructions must export the PC in the interpreter,
+         * or the GC won't be able to identify the current PC for the thread.
+         */
+        static const int gcMask = kInstrCanBranch | kInstrCanSwitch |
             kInstrCanThrow | kInstrCanReturn;
+
         int width = dvmInsnGetWidth(insnFlags, i);
         OpCode opcode = *insns & 0xff;
         InstructionFlags opFlags = dexGetInstrFlags(gDvm.instrFlags, opcode);
         int offset, absOffset;
 
-        if ((opFlags & gcMask) != 0)
-            dvmInsnSetGcPoint(insnFlags, i, true);
+        if ((opFlags & gcMask) != 0) {
+            /*
+             * This instruction is probably a GC point.  Branch instructions
+             * only qualify if they go backward, so we need to check the
+             * offset.
+             */
+            int offset = -1;
+            bool unused;
+            if (dvmGetBranchTarget(meth, insnFlags, i, &offset, &unused)) {
+                if (offset < 0) {
+                    dvmInsnSetGcPoint(insnFlags, i, true);
+                }
+            } else {
+                /* not a branch target */
+                dvmInsnSetGcPoint(insnFlags, i, true);
+            }
+        }
 
         switch (opcode) {
         case OP_NOP:
diff --git a/vm/analysis/RegisterMap.c b/vm/analysis/RegisterMap.c
index b02874a..f783f88 100644
--- a/vm/analysis/RegisterMap.c
+++ b/vm/analysis/RegisterMap.c
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-// ** UNDER CONSTRUCTION **
-
 /*
  * This code generate "register maps" for Dalvik bytecode.  In a stack-based
  * VM we might call these "stack maps".  They are used to increase the
@@ -32,7 +30,7 @@
 
 
 /*
-Notes on just-in-time RegisterMap generation
+Notes on just-in-time RegisterMap generation [not supported]
 
 Generating RegisterMap tables as part of verification is convenient because
 we generate most of what we need to know as part of doing the verify.
@@ -85,6 +83,127 @@
 // fwd
 static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data);
 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap);
+static void computeMapStats(RegisterMap* pMap, const Method* method);
+
+
+//#define REGISTER_MAP_STATS
+#ifdef REGISTER_MAP_STATS
+/*
+ * Generate some statistics on the register maps we generate.
+ */
+#define kMaxGcPointGap      50
+#define kUpdatePosnMinRegs  24
+#define kNumUpdatePosns     8
+#define kMaxDiffBits        20
+typedef struct MapStats {
+    /*
+     * Buckets measuring the distance between GC points.  This tells us how
+     * many bits we need to encode the advancing program counter.  We ignore
+     * some of the "long tail" entries.
+     */
+    int gcPointGap[kMaxGcPointGap];
+
+    /*
+     * Number of gaps.  Equal to (number of gcPoints - number of methods),
+     * since the computation isn't including the initial gap.
+     */
+    int gcGapCount;
+
+    /*
+     * Number of gaps.
+     */
+    int totalGcPointCount;
+
+    /*
+     * For larger methods (>= 24 registers), measure in which octant register
+     * updates occur.  This should help us understand whether register
+     * changes tend to cluster in the low regs even for large methods.
+     */
+    int updatePosn[kNumUpdatePosns];
+
+    /*
+     * For all methods, count up the number of changes to registers < 16
+     * and >= 16.
+     */
+    int updateLT16;
+    int updateGE16;
+
+    /*
+     * Histogram of the number of bits that differ between adjacent entries.
+     */
+    int numDiffBits[kMaxDiffBits];
+} MapStats;
+#endif
+
+/*
+ * Prepare some things.
+ */
+bool dvmRegisterMapStartup(void)
+{
+#ifdef REGISTER_MAP_STATS
+    MapStats* pStats = calloc(1, sizeof(MapStats));
+    gDvm.registerMapStats = pStats;
+#endif
+    return true;
+}
+
+/*
+ * Clean up.
+ */
+void dvmRegisterMapShutdown(void)
+{
+#ifdef REGISTER_MAP_STATS
+    free(gDvm.registerMapStats);
+#endif
+}
+
+/*
+ * Write stats to log file.
+ */
+void dvmRegisterMapDumpStats(void)
+{
+#ifdef REGISTER_MAP_STATS
+    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
+    int i, end;
+
+    for (end = kMaxGcPointGap-1; end >= 0; end--) {
+        if (pStats->gcPointGap[end] != 0)
+            break;
+    }
+
+    LOGI("Register Map gcPointGap stats (diff count=%d, total=%d):\n",
+        pStats->gcGapCount, pStats->totalGcPointCount);
+    assert(pStats->gcPointGap[0] == 0);
+    for (i = 1; i <= end; i++) {
+        LOGI(" %2d %d\n", i, pStats->gcPointGap[i]);
+    }
+
+
+    for (end = kMaxDiffBits-1; end >= 0; end--) {
+        if (pStats->numDiffBits[end] != 0)
+            break;
+    }
+
+    LOGI("Register Map bit difference stats:\n");
+    for (i = 0; i <= end; i++) {
+        LOGI(" %2d %d\n", i, pStats->numDiffBits[i]);
+    }
+
+
+    LOGI("Register Map update position stats (lt16=%d ge16=%d):\n",
+        pStats->updateLT16, pStats->updateGE16);
+    for (i = 0; i < kNumUpdatePosns; i++) {
+        LOGI(" %2d %d\n", i, pStats->updatePosn[i]);
+    }
+#endif
+}
+
+
+/*
+ * ===========================================================================
+ *      Map generation
+ * ===========================================================================
+ */
 
 /*
  * Generate the register map for a method that has just been verified
@@ -105,12 +224,18 @@
     int i, bytesForAddr, gcPointCount;
     int bufSize;
 
+    if (vdata->method->registersSize >= 2048) {
+        LOGE("ERROR: register map can't handle %d registers\n",
+            vdata->method->registersSize);
+        goto bail;
+    }
     regWidth = (vdata->method->registersSize + 7) / 8;
+
     if (vdata->insnsSize < 256) {
-        format = kFormatCompact8;
+        format = kRegMapFormatCompact8;
         bytesForAddr = 1;
     } else {
-        format = kFormatCompact16;
+        format = kRegMapFormatCompact16;
         bytesForAddr = 2;
     }
 
@@ -138,14 +263,14 @@
     bufSize = offsetof(RegisterMap, data);
     bufSize += gcPointCount * (bytesForAddr + regWidth);
 
-    LOGD("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)\n",
+    LOGV("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)\n",
         vdata->method->clazz->descriptor, vdata->method->name,
         bytesForAddr, gcPointCount, regWidth, bufSize);
 
     pMap = (RegisterMap*) malloc(bufSize);
-    pMap->format = format;
+    pMap->format = format | kRegMapFormatOnHeap;
     pMap->regWidth = regWidth;
-    pMap->numEntries = gcPointCount;
+    dvmRegisterMapSetNumEntries(pMap, gcPointCount);
 
     /*
      * Populate it.
@@ -154,9 +279,9 @@
     for (i = 0; i < vdata->insnsSize; i++) {
         if (dvmInsnIsGcPoint(vdata->insnFlags, i)) {
             assert(vdata->addrRegs[i] != NULL);
-            if (format == kFormatCompact8) {
+            if (format == kRegMapFormatCompact8) {
                 *mapData++ = i;
-            } else /*kFormatCompact16*/ {
+            } else /*kRegMapFormatCompact16*/ {
                 *mapData++ = i & 0xff;
                 *mapData++ = i >> 8;
             }
@@ -165,13 +290,16 @@
         }
     }
 
-    LOGI("mapData=%p pMap=%p bufSize=%d\n", mapData, pMap, bufSize);
+    LOGV("mapData=%p pMap=%p bufSize=%d\n", mapData, pMap, bufSize);
     assert(mapData - (const u1*) pMap == bufSize);
 
 #if 1
     if (!verifyMap(vdata, pMap))
         goto bail;
 #endif
+#ifdef REGISTER_MAP_STATS
+    computeMapStats(pMap, vdata->method);
+#endif
 
     pResult = pMap;
 
@@ -187,6 +315,7 @@
     if (pMap == NULL)
         return;
 
+    assert(dvmGetRegisterMapOnHeap(pMap));
     free(pMap);
 }
 
@@ -235,36 +364,64 @@
  * Double-check the map.
  *
  * We run through all of the data in the map, and compare it to the original.
+ * Only works on uncompressed data.
  */
 static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap)
 {
-    const u1* data = pMap->data;
+    const u1* rawMap = pMap->data;
+    const u1 format = dvmGetRegisterMapFormat(pMap);
+    const u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
     int ent;
+    bool dumpMap = false;
 
-    for (ent = 0; ent < pMap->numEntries; ent++) {
+    if (false) {
+        const char* cd = "Landroid/net/http/Request;";
+        const char* mn = "readResponse";
+        const char* sg = "(Landroid/net/http/AndroidHttpClientConnection;)V";
+        if (strcmp(vdata->method->clazz->descriptor, cd) == 0 &&
+            strcmp(vdata->method->name, mn) == 0)
+        {
+            char* desc;
+            desc = dexProtoCopyMethodDescriptor(&vdata->method->prototype);
+            LOGI("Map for %s.%s %s\n", vdata->method->clazz->descriptor,
+                vdata->method->name, desc);
+            free(desc);
+
+            dumpMap = true;
+        }
+    }
+
+    if ((vdata->method->registersSize + 7) / 8 != pMap->regWidth) {
+        LOGE("GLITCH: registersSize=%d, regWidth=%d\n",
+            vdata->method->registersSize, pMap->regWidth);
+        return false;
+    }
+
+    for (ent = 0; ent < numEntries; ent++) {
         int addr;
 
-        switch (pMap->format) {
-        case kFormatCompact8:
-            addr = *data++;
+        switch (format) {
+        case kRegMapFormatCompact8:
+            addr = *rawMap++;
             break;
-        case kFormatCompact16:
-            addr = *data++;
-            addr |= (*data++) << 8;
+        case kRegMapFormatCompact16:
+            addr = *rawMap++;
+            addr |= (*rawMap++) << 8;
             break;
         default:
             /* shouldn't happen */
-            LOGE("GLITCH: bad format (%d)", pMap->format);
+            LOGE("GLITCH: bad format (%d)", format);
             dvmAbort();
         }
 
+        const u1* dataStart = rawMap;
         const RegType* regs = vdata->addrRegs[addr];
         if (regs == NULL) {
             LOGE("GLITCH: addr %d has no data\n", addr);
             return false;
         }
 
-        u1 val;
+        u1 val = 0;
         int i;
 
         for (i = 0; i < vdata->method->registersSize; i++) {
@@ -273,7 +430,7 @@
             val >>= 1;
             if ((i & 0x07) == 0) {
                 /* load next byte of data */
-                val = *data++;
+                val = *rawMap++;
             }
 
             bitIsRef = val & 0x01;
@@ -288,9 +445,12 @@
             }
         }
 
+        /* rawMap now points to the address field of the next entry */
+
         /* print the map as a binary string */
-        if (false) {
+        if (dumpMap) {
             char outBuf[vdata->method->registersSize +1];
+            char hexBuf[((vdata->method->registersSize + 7) / 8) * 3 +1];
             for (i = 0; i < vdata->method->registersSize; i++) {
                 if (isReferenceType(regs[i])) {
                     outBuf[i] = '1';
@@ -299,7 +459,15 @@
                 }
             }
             outBuf[i] = '\0';
-            LOGD("  %04d %s\n", addr, outBuf);
+
+            char* cp = hexBuf;
+            const u1* altData = dataStart;
+            for (i = 0; i < (vdata->method->registersSize + 7) / 8; i++) {
+                sprintf(cp, " %02x", *altData++);
+                cp += 3;
+            }
+            hexBuf[i * 3] = '\0';
+            LOGD("  %04x %s %s\n", addr, outBuf, hexBuf);
         }
     }
 
@@ -309,6 +477,655 @@
 
 /*
  * ===========================================================================
+ *      DEX generation & parsing
+ * ===========================================================================
+ */
+
+/*
+ * Advance "ptr" to ensure 32-bit alignment.
+ */
+static inline u1* align32(u1* ptr)
+{
+    return (u1*) (((int) ptr + 3) & ~0x03);
+}
+
+/*
+ * Compute the size, in bytes, of a register map.
+ */
+static size_t computeRegisterMapSize(const RegisterMap* pMap)
+{
+    static const int kHeaderSize = offsetof(RegisterMap, data);
+    u1 format = dvmGetRegisterMapFormat(pMap);
+    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
+
+    assert(pMap != NULL);
+
+    switch (format) {
+    case kRegMapFormatNone:
+        return 1;
+    case kRegMapFormatCompact8:
+        return kHeaderSize + (1 + pMap->regWidth) * numEntries;
+    case kRegMapFormatCompact16:
+        return kHeaderSize + (2 + pMap->regWidth) * numEntries;
+    default:
+        LOGE("Bad register map format %d\n", format);
+        dvmAbort();
+        return 0;
+    }
+}
+
+/*
+ * Output the map for a single method, if it has one.
+ *
+ * Abstract and native methods have no map.  All others are expected to
+ * have one, since we know the class verified successfully.
+ *
+ * This strips the "allocated on heap" flag from the format byte, so that
+ * direct-mapped maps are correctly identified as such.
+ */
+static bool writeMapForMethod(const Method* meth, u1** pPtr)
+{
+    if (meth->registerMap == NULL) {
+        if (!dvmIsAbstractMethod(meth) && !dvmIsNativeMethod(meth)) {
+            LOGW("Warning: no map available for %s.%s\n",
+                meth->clazz->descriptor, meth->name);
+            /* weird, but keep going */
+        }
+        *(*pPtr)++ = kRegMapFormatNone;
+        return true;
+    }
+
+    /* serialize map into the buffer */
+    size_t mapSize = computeRegisterMapSize(meth->registerMap);
+    memcpy(*pPtr, meth->registerMap, mapSize);
+
+    /* strip the "on heap" flag out of the format byte, which is always first */
+    assert(**pPtr == meth->registerMap->format);
+    **pPtr &= ~(kRegMapFormatOnHeap);
+
+    *pPtr += mapSize;
+
+    return true;
+}
+
+/*
+ * Write maps for all methods in the specified class to the buffer, which
+ * can hold at most "length" bytes.  "*pPtr" will be advanced past the end
+ * of the data we write.
+ */
+static bool writeMapsAllMethods(DvmDex* pDvmDex, const ClassObject* clazz,
+    u1** pPtr, size_t length)
+{
+    RegisterMapMethodPool* pMethodPool;
+    u1* ptr = *pPtr;
+    int i, methodCount;
+
+    /* artificial limit */
+    if (clazz->virtualMethodCount + clazz->directMethodCount >= 65536) {
+        LOGE("Too many methods in %s\n", clazz->descriptor);
+        return false;
+    }
+
+    pMethodPool = (RegisterMapMethodPool*) ptr;
+    ptr += offsetof(RegisterMapMethodPool, methodData);
+    methodCount = 0;
+
+    /*
+     * Run through all methods, direct then virtual.  The class loader will
+     * traverse them in the same order.  (We could split them into two
+     * distinct pieces, but there doesn't appear to be any value in doing
+     * so other than that it makes class loading slightly less fragile.)
+     *
+     * The class loader won't know about miranda methods at the point
+     * where it parses this, so we omit those.
+     *
+     * TODO: consider omitting all native/abstract definitions.  Should be
+     * safe, though we lose the ability to sanity-check against the
+     * method counts in the DEX file.
+     */
+    for (i = 0; i < clazz->directMethodCount; i++) {
+        const Method* meth = &clazz->directMethods[i];
+        if (dvmIsMirandaMethod(meth))
+            continue;
+        if (!writeMapForMethod(&clazz->directMethods[i], &ptr)) {
+            return false;
+        }
+        methodCount++;
+        //ptr = align32(ptr);
+    }
+
+    for (i = 0; i < clazz->virtualMethodCount; i++) {
+        const Method* meth = &clazz->virtualMethods[i];
+        if (dvmIsMirandaMethod(meth))
+            continue;
+        if (!writeMapForMethod(&clazz->virtualMethods[i], &ptr)) {
+            return false;
+        }
+        methodCount++;
+        //ptr = align32(ptr);
+    }
+
+    pMethodPool->methodCount = methodCount;
+
+    *pPtr = ptr;
+    return true;
+}
+
+/*
+ * Write maps for all classes to the specified buffer, which can hold at
+ * most "length" bytes.
+ *
+ * Returns the actual length used, or 0 on failure.
+ */
+static size_t writeMapsAllClasses(DvmDex* pDvmDex, u1* basePtr, size_t length)
+{
+    DexFile* pDexFile = pDvmDex->pDexFile;
+    u4 count = pDexFile->pHeader->classDefsSize;
+    RegisterMapClassPool* pClassPool;
+    u4* offsetTable;
+    u1* ptr = basePtr;
+    u4 idx;
+
+    assert(gDvm.optimizing);
+
+    pClassPool = (RegisterMapClassPool*) ptr;
+    ptr += offsetof(RegisterMapClassPool, classDataOffset);
+    offsetTable = (u4*) ptr;
+    ptr += count * sizeof(u4);
+
+    pClassPool->numClasses = count;
+
+    /*
+     * We want an entry for every class, loaded or not.
+     */
+    for (idx = 0; idx < count; idx++) {
+        const DexClassDef* pClassDef;
+        const char* classDescriptor;
+        ClassObject* clazz;
+
+        pClassDef = dexGetClassDef(pDexFile, idx);
+        classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
+
+        /*
+         * All classes have been loaded into the bootstrap class loader.
+         * If we can find it, and it was successfully pre-verified, we
+         * run through its methods and add the register maps.
+         *
+         * If it wasn't pre-verified then we know it can't have any
+         * register maps.  Classes that can't be loaded or failed
+         * verification get an empty slot in the index.
+         */
+        clazz = NULL;
+        if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) != 0)
+            clazz = dvmLookupClass(classDescriptor, NULL, false);
+
+        if (clazz != NULL) {
+            offsetTable[idx] = ptr - basePtr;
+            LOGVV("%d -> offset %d (%p-%p)\n",
+                idx, offsetTable[idx], ptr, basePtr);
+
+            if (!writeMapsAllMethods(pDvmDex, clazz, &ptr,
+                    length - (ptr - basePtr)))
+            {
+                return 0;
+            }
+
+            ptr = align32(ptr);
+            LOGVV("Size %s (%d+%d methods): %d\n", clazz->descriptor,
+                clazz->directMethodCount, clazz->virtualMethodCount,
+                (ptr - basePtr) - offsetTable[idx]);
+        } else {
+            LOGV("%4d NOT mapadding '%s'\n", idx, classDescriptor);
+            assert(offsetTable[idx] == 0);
+        }
+    }
+
+    if (ptr - basePtr >= (int)length) {
+        /* a bit late */
+        LOGE("Buffer overrun\n");
+        dvmAbort();
+    }
+
+    return ptr - basePtr;
+}
+
+/*
+ * Generate a register map set for all verified classes in "pDvmDex".
+ */
+RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex)
+{
+    RegisterMapBuilder* pBuilder;
+
+    pBuilder = (RegisterMapBuilder*) calloc(1, sizeof(RegisterMapBuilder));
+    if (pBuilder == NULL)
+        return NULL;
+
+    /*
+     * We have a couple of options here:
+     *  (1) Compute the size of the output, and malloc a buffer.
+     *  (2) Create a "large-enough" anonymous mmap region.
+     *
+     * The nice thing about option #2 is that we don't have to traverse
+     * all of the classes and methods twice.  The risk is that we might
+     * not make the region large enough.  Since the pages aren't mapped
+     * until used we can allocate a semi-absurd amount of memory without
+     * worrying about the effect on the rest of the system.
+     *
+     * The basic encoding on the largest jar file requires about 1MB of
+     * storage.  We map out 4MB here.  (TODO: guarantee that the last
+     * page of the mapping is marked invalid, so we reliably fail if
+     * we overrun.)
+     */
+    if (sysCreatePrivateMap(4 * 1024 * 1024, &pBuilder->memMap) != 0) {
+        free(pBuilder);
+        return NULL;
+    }
+
+    /*
+     * Create the maps.
+     */
+    size_t actual = writeMapsAllClasses(pDvmDex, (u1*)pBuilder->memMap.addr,
+                                        pBuilder->memMap.length);
+    if (actual == 0) {
+        dvmFreeRegisterMapBuilder(pBuilder);
+        return NULL;
+    }
+
+    LOGI("TOTAL size of register maps: %d\n", actual);
+
+    pBuilder->data = pBuilder->memMap.addr;
+    pBuilder->size = actual;
+    return pBuilder;
+}
+
+/*
+ * Free the builder.
+ */
+void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder)
+{
+    if (pBuilder == NULL)
+        return;
+
+    sysReleaseShmem(&pBuilder->memMap);
+    free(pBuilder);
+}
+
+
+/*
+ * Find the data for the specified class.
+ *
+ * If there's no register map data, or none for this class, we return NULL.
+ */
+const void* dvmGetRegisterMapClassData(const DexFile* pDexFile, u4 classIdx,
+    u4* pNumMaps)
+{
+    const RegisterMapClassPool* pClassPool;
+    const RegisterMapMethodPool* pMethodPool;
+
+    pClassPool = (const RegisterMapClassPool*) pDexFile->pRegisterMapPool;
+    if (pClassPool == NULL)
+        return NULL;
+
+    if (classIdx >= pClassPool->numClasses) {
+        LOGE("bad class index (%d vs %d)\n", classIdx, pClassPool->numClasses);
+        dvmAbort();
+    }
+
+    u4 classOffset = pClassPool->classDataOffset[classIdx];
+    if (classOffset == 0) {
+        LOGV("+++ no map for classIdx=%d\n", classIdx);
+        return NULL;
+    }
+
+    pMethodPool =
+        (const RegisterMapMethodPool*) (((u1*) pClassPool) + classOffset);
+    if (pNumMaps != NULL)
+        *pNumMaps = pMethodPool->methodCount;
+    return pMethodPool->methodData;
+}
+
+/*
+ * This advances "*pPtr" and returns its original value.
+ */
+const RegisterMap* dvmGetNextRegisterMap(const void** pPtr)
+{
+    const RegisterMap* pMap = *pPtr;
+
+    *pPtr = /*align32*/(((u1*) pMap) + computeRegisterMapSize(pMap));
+    LOGVV("getNext: %p -> %p (f=0x%x w=%d e=%d)\n",
+        pMap, *pPtr, pMap->format, pMap->regWidth,
+        dvmRegisterMapGetNumEntries(pMap));
+    return pMap;
+}
+
+
+/*
+ * ===========================================================================
+ *      Utility functions
+ * ===========================================================================
+ */
+
+/*
+ * Return the data for the specified address, or NULL if not found.
+ *
+ * The result must be released with dvmReleaseRegisterMapLine().
+ */
+const u1* dvmGetRegisterMapLine(const RegisterMap* pMap, int addr)
+{
+    int addrWidth, lineWidth;
+    u1 format = dvmGetRegisterMapFormat(pMap);
+    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
+
+    assert(numEntries > 0);
+
+    switch (format) {
+    case kRegMapFormatNone:
+        return NULL;
+    case kRegMapFormatCompact8:
+        addrWidth = 1;
+        break;
+    case kRegMapFormatCompact16:
+        addrWidth = 2;
+        break;
+    default:
+        LOGE("Unknown format %d\n", format);
+        dvmAbort();
+        return NULL;
+    }
+
+    lineWidth = addrWidth + pMap->regWidth;
+
+    /*
+     * Find the appropriate entry.  Many maps are very small, some are very
+     * large.
+     */
+    static const int kSearchThreshold = 8;
+    const u1* data;
+    int lineAddr;
+
+    if (numEntries < kSearchThreshold) {
+        int i;
+        data = pMap->data;
+        for (i = numEntries; i > 0; i--) {
+            lineAddr = data[0];
+            if (addrWidth > 1)
+                lineAddr |= data[1] << 8;
+            if (lineAddr == addr)
+                return data + addrWidth;
+
+            data += lineWidth;
+        }
+    } else {
+        int hi, lo, mid;
+
+        lo = 0;
+        hi = numEntries -1;
+
+        while (hi >= lo) {
+            mid = (hi + lo) / 2;
+            data = pMap->data + lineWidth * mid;
+
+            lineAddr = data[0];
+            if (addrWidth > 1)
+                lineAddr |= data[1] << 8;
+
+            if (addr > lineAddr) {
+                lo = mid + 1;
+            } else if (addr < lineAddr) {
+                hi = mid - 1;
+            } else {
+                return data + addrWidth;
+            }
+        }
+    }
+
+    assert(data == pMap->data + lineWidth * numEntries);
+    return NULL;
+}
+
+
+/*
+ * ===========================================================================
+ *      Map compression
+ * ===========================================================================
+ */
+
+/*
+Notes on map compression
+
+The idea is to create a compressed form that will be uncompressed before
+use, with the output possibly saved in a cache.  This means we can use an
+approach that is unsuited for random access if we choose.
+
+In the event that a map simply does not work with our compression scheme,
+it's reasonable to store the map without compression.  In the future we
+may want to have more than one compression scheme, and try each in turn,
+retaining the best.  (We certainly want to keep the uncompressed form if it
+turns out to be smaller or even slightly larger than the compressed form.)
+
+Each entry consists of an address and a bit vector.  Adjacent entries are
+strongly correlated, suggesting differential encoding.
+
+
+Ideally we would avoid outputting adjacent entries with identical
+bit vectors.  However, the register values at a given address do not
+imply anything about the set of valid registers at subsequent addresses.
+We therefore cannot omit an entry.
+
+  If the thread stack has a PC at an address without a corresponding
+  entry in the register map, we must conservatively scan the registers in
+  that thread.  This can happen when single-stepping in the debugger,
+  because the debugger is allowed to invoke arbitrary methods when
+  a thread is stopped at a breakpoint.  If we can guarantee that a GC
+  thread scan will never happen while the debugger has that thread stopped,
+  then we can lift this restriction and simply omit entries that don't
+  change the bit vector from its previous state.
+
+Each entry advances the address value by at least 1 (measured in 16-bit
+"code units").  Looking at the bootclasspath entries, advancing by 2 units
+is most common.  Advances by 1 unit are far less common than advances by
+2 units, but more common than 5, and things fall off rapidly.  Gaps of
+up to 220 code units appear in some computationally intensive bits of code,
+but are exceedingly rare.
+
+If we sum up the number of transitions in a couple of ranges in framework.jar:
+  [1,4]: 188998 of 218922 gaps (86.3%)
+  [1,7]: 211647 of 218922 gaps (96.7%)
+Using a 3-bit delta, with one value reserved as an escape code, should
+yield good results for the address.
+
+These results would change dramatically if we reduced the set of GC
+points by e.g. removing instructions like integer divide that are only
+present because they can throw and cause an allocation.
+
+We also need to include an "initial gap", because the first few instructions
+in a method may not be GC points.
+
+
+By observation, many entries simply repeat the previous bit vector, or
+change only one or two bits.  (This is with type-precise information;
+the rate of change of bits will be different if live-precise information
+is factored in).
+
+Looking again at adjacent entries in framework.jar:
+  0 bits changed: 63.0%
+  1 bit changed: 32.2%
+After that it falls off rapidly, e.g. the number of entries with 2 bits
+changed is usually less than 1/10th of the number of entries with 1 bit
+changed.  A solution that allows us to encode 0- or 1- bit changes
+efficiently will do well.
+
+We still need to handle cases where a large number of bits change.  We
+probably want a way to drop in a full copy of the bit vector when it's
+smaller than the representation of multiple bit changes.
+
+
+The bit-change information can be encoded as an index that tells the
+decoder to toggle the state.  We want to encode the index in as few bits
+as possible, but we need to allow for fairly wide vectors (e.g. we have a
+method with 175 registers).  We can deal with this in a couple of ways:
+(1) use an encoding that assumes few registers and has an escape code
+for larger numbers of registers; or (2) use different encodings based
+on how many total registers the method has.  The choice depends to some
+extent on whether methods with large numbers of registers tend to modify
+the first 16 regs more often than the others.
+
+The last N registers hold method arguments.  If the bytecode is expected
+to be examined in a debugger, "dx" ensures that the contents of these
+registers won't change.  Depending upon the encoding format, we may be
+able to take advantage of this.  We still have to encode the initial
+state, but we know we'll never have to output a bit change for the last
+N registers.
+
+Considering only methods with 16 or more registers, the "target octant"
+for register changes looks like this:
+  [ 43.1%, 16.4%, 6.5%, 6.2%, 7.4%, 8.8%, 9.7%, 1.8% ]
+As expected, there are fewer changes at the end of the list where the
+arguments are kept, and more changes at the start of the list because
+register values smaller than 16 can be used in compact Dalvik instructions
+and hence are favored for frequently-used values.  In general, the first
+octant is considerably more active than later entries, the last octant
+is much less active, and the rest are all about the same.
+
+Looking at all bit changes in all methods, 94% are to registers 0-15.  The
+encoding will benefit greatly by favoring the low-numbered registers.
+
+
+Some of the smaller methods have identical maps, and space could be
+saved by simply including a pointer to an earlier definition.  This would
+be best accomplished by specifying a "pointer" format value, followed by
+a 3-byte (or ULEB128) offset.  Implementing this would probably involve
+generating a hash value for each register map and maintaining a hash table.
+
+In some cases there are repeating patterns in the bit vector that aren't
+adjacent.  These could benefit from a dictionary encoding.  This doesn't
+really become useful until the methods reach a certain size though,
+and managing the dictionary may incur more overhead than we want.
+*/
+
+/*
+ * Compute some stats on the register map.
+ */
+static void computeMapStats(RegisterMap* pMap, const Method* method)
+{
+#ifdef REGISTER_MAP_STATS
+    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
+    const u1 format = dvmGetRegisterMapFormat(pMap);
+    const u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
+    const u1* rawMap = pMap->data;
+    const u1* prevData = NULL;
+    int ent, addr, prevAddr = -1;
+
+    for (ent = 0; ent < numEntries; ent++) {
+        switch (format) {
+        case kRegMapFormatCompact8:
+            addr = *rawMap++;
+            break;
+        case kRegMapFormatCompact16:
+            addr = *rawMap++;
+            addr |= (*rawMap++) << 8;
+            break;
+        default:
+            /* shouldn't happen */
+            LOGE("GLITCH: bad format (%d)", format);
+            dvmAbort();
+        }
+
+        const u1* dataStart = rawMap;
+
+        pStats->totalGcPointCount++;
+
+        /*
+         * Gather "gap size" stats, i.e. the difference in addresses between
+         * successive GC points.
+         */
+        if (prevData != NULL) {
+            assert(prevAddr >= 0);
+            int addrDiff = addr - prevAddr;
+
+            if (addrDiff < 0) {
+                LOGE("GLITCH: address went backward (0x%04x->0x%04x, %s.%s)\n",
+                    prevAddr, addr, method->clazz->descriptor, method->name);
+            } else if (addrDiff > kMaxGcPointGap) {
+                LOGI("ARGH: addrDiff is %d, max %d (0x%04x->0x%04x, %s.%s)\n",
+                    addrDiff, kMaxGcPointGap, prevAddr, addr,
+                    method->clazz->descriptor, method->name);
+                /* skip this one */
+            } else {
+                pStats->gcPointGap[addrDiff]++;
+            }
+            pStats->gcGapCount++;
+
+
+            /*
+             * Compare bit vectors in adjacent entries.  We want to count
+             * up the number of bits that differ (to see if we frequently
+             * change 0 or 1 bits) and get a sense for which part of the
+             * vector changes the most often (near the start, middle, end).
+             *
+             * We only do the vector position quantization if we have at
+             * least 16 registers in the method.
+             */
+            int numDiff = 0;
+            float div = (float) kNumUpdatePosns / method->registersSize;
+            int regByte;
+            for (regByte = 0; regByte < pMap->regWidth; regByte++) {
+                int prev, cur, bit;
+
+                prev = prevData[regByte];
+                cur = dataStart[regByte];
+
+                for (bit = 0; bit < 8; bit++) {
+                    if (((prev >> bit) & 1) != ((cur >> bit) & 1)) {
+                        numDiff++;
+
+                        int bitNum = regByte * 8 + bit;
+
+                        if (bitNum < 16)
+                            pStats->updateLT16++;
+                        else
+                            pStats->updateGE16++;
+
+                        if (method->registersSize < 16)
+                            continue;
+
+                        if (bitNum >= method->registersSize) {
+                            /* stuff off the end should be zero in both */
+                            LOGE("WEIRD: bit=%d (%d/%d), prev=%02x cur=%02x\n",
+                                bit, regByte, method->registersSize,
+                                prev, cur);
+                            assert(false);
+                        }
+                        int idx = (int) (bitNum * div);
+                        if (!(idx >= 0 && idx < kNumUpdatePosns)) {
+                            LOGE("FAIL: bitNum=%d (of %d) div=%.3f idx=%d\n",
+                                bitNum, method->registersSize, div, idx);
+                            assert(false);
+                        }
+                        pStats->updatePosn[idx]++;
+                    }
+                }
+            }
+
+            if (numDiff > kMaxDiffBits) {
+                LOGW("ARGH: numDiff is %d, max %d\n", numDiff, kMaxDiffBits);
+            } else {
+                pStats->numDiffBits[numDiff]++;
+            }
+        }
+
+        /* advance to start of next line */
+        rawMap += pMap->regWidth;
+
+        prevAddr = addr;
+        prevData = dataStart;
+    }
+#endif
+}
+
+
+/*
+ * ===========================================================================
  *      Just-in-time generation
  * ===========================================================================
  */
diff --git a/vm/analysis/RegisterMap.h b/vm/analysis/RegisterMap.h
index 2a890e7..1009def 100644
--- a/vm/analysis/RegisterMap.h
+++ b/vm/analysis/RegisterMap.h
@@ -14,38 +14,195 @@
  * limitations under the License.
  */
 
-// ** UNDER CONSTRUCTION **
-
 /*
  * Declaration of register map data structure and related functions.
+ *
+ * These structures should be treated as opaque through most of the VM.
  */
 #ifndef _DALVIK_REGISTERMAP
 #define _DALVIK_REGISTERMAP
 
+#include "analysis/VerifySubs.h"
+#include "analysis/CodeVerify.h"
+
 /*
  * Format enumeration for RegisterMap data area.
  */
 typedef enum RegisterMapFormat {
-    kFormatUnknown = 0,
-    kFormatCompact8,        /* compact layout, 8-bit addresses */
-    kFormatCompact16,       /* compact layout, 16-bit addresses */
+    kRegMapFormatUnknown = 0,
+    kRegMapFormatNone,          /* indicates no map data follows */
+    kRegMapFormatCompact8,      /* compact layout, 8-bit addresses */
+    kRegMapFormatCompact16,     /* compact layout, 16-bit addresses */
     // TODO: compressed stream
+
+    kRegMapFormatOnHeap = 0x80, /* bit flag, indicates allocation on heap */
 } RegisterMapFormat;
 
 /*
  * This is a single variable-size structure.  It may be allocated on the
  * heap or mapped out of a (post-dexopt) DEX file.
+ *
+ * 32-bit alignment of the structure is NOT guaranteed.  This makes it a
+ * little awkward to deal with as a structure; to avoid accidents we use
+ * only byte types.  Multi-byte values are little-endian.
+ *
+ * Size of (format==FormatNone): 1 byte
+ * Size of (format==FormatCompact8): 4 + (1 + regWidth) * numEntries
+ * Size of (format==FormatCompact16): 4 + (2 + regWidth) * numEntries
  */
 struct RegisterMap {
     /* header */
-    u1      format;         /* enum RegisterMapFormat */
+    u1      format;         /* enum RegisterMapFormat; MUST be first entry */
     u1      regWidth;       /* bytes per register line, 1+ */
-    u2      numEntries;     /* number of entries */
+    u1      numEntries[2];  /* number of entries */
 
-    /* data starts here; no alignment guarantees made */
+    /* raw data starts here; need not be aligned */
     u1      data[1];
 };
 
+bool dvmRegisterMapStartup(void);
+void dvmRegisterMapShutdown(void);
+
+/*
+ * Get the format.
+ */
+INLINE u1 dvmGetRegisterMapFormat(const RegisterMap* pMap) {
+    return pMap->format & ~(kRegMapFormatOnHeap);
+}
+
+/*
+ * Get the "on heap" flag.
+ */
+INLINE bool dvmGetRegisterMapOnHeap(const RegisterMap* pMap) {
+    return (pMap->format & kRegMapFormatOnHeap) != 0;
+}
+
+/*
+ * Set the "on heap" flag.
+ */
+INLINE void dvmSetRegisterMapOnHeap(RegisterMap* pMap, bool val) {
+    if (val)
+        pMap->format |= kRegMapFormatOnHeap;
+    else
+        pMap->format &= ~(kRegMapFormatOnHeap);
+}
+
+/*
+ * Get the number of entries in this map.
+ */
+INLINE u2 dvmRegisterMapGetNumEntries(const RegisterMap* pMap) {
+    return pMap->numEntries[0] | (pMap->numEntries[1] << 8);
+}
+
+/*
+ * Set the number of entries in this map.
+ */
+INLINE void dvmRegisterMapSetNumEntries(RegisterMap* pMap, u2 numEntries) {
+    pMap->numEntries[0] = (u1) numEntries;
+    pMap->numEntries[1] = numEntries >> 8;
+}
+
+/*
+ * Retrieve the bit vector for the specified address.  This is a pointer
+ * to the bit data from an uncompressed map, or to a temporary copy of
+ * data from a compressed map.
+ *
+ * The caller must call dvmReleaseRegisterMapLine() with the result.
+ *
+ * Returns NULL if not found.
+ */
+const u1* dvmGetRegisterMapLine(const RegisterMap* pMap, int addr);
+
+/*
+ * Release "data".
+ *
+ * If "pMap" points to a compressed map, this will free "data"; otherwise,
+ * it does nothing.
+ */
+INLINE void dvmReleaseRegisterMapLine(const RegisterMap* pMap, const u1* data)
+{}
+
+
+/*
+ * A pool of register maps for methods associated with a single class.
+ *
+ * Each entry is a 4-byte method index followed by the 32-bit-aligned
+ * RegisterMap.  The size of the RegisterMap is determined by parsing
+ * the map.  The lack of an index reduces random access speed, but we
+ * should be doing that rarely (during class load) and it saves space.
+ *
+ * These structures are 32-bit aligned.
+ */
+typedef struct RegisterMapMethodPool {
+    u2      methodCount;            /* chiefly used as a sanity check */
+
+    /* stream of per-method data starts here */
+    u4      methodData[1];
+} RegisterMapMethodPool;
+
+/*
+ * Header for the memory-mapped RegisterMap pool in the DEX file.
+ *
+ * The classDataOffset table provides offsets from the start of the
+ * RegisterMapPool structure.  There is one entry per class (including
+ * interfaces, which can have static initializers).
+ *
+ * The offset points to a RegisterMapMethodPool.
+ *
+ * These structures are 32-bit aligned.
+ */
+typedef struct RegisterMapClassPool {
+    u4      numClasses;
+
+    /* offset table starts here, 32-bit aligned; offset==0 means no data */
+    u4      classDataOffset[1];
+} RegisterMapClassPool;
+
+/*
+ * Find the register maps for this class.  (Used during class loading.)
+ * If "pNumMaps" is non-NULL, it will return the number of maps in the set.
+ *
+ * Returns NULL if none is available.
+ */
+const void* dvmGetRegisterMapClassData(const DexFile* pDexFile, u4 classIdx,
+    u4* pNumMaps);
+
+/*
+ * Get the register map for the next method.  "*pPtr" will be advanced past
+ * the end of the map.  (Used during class loading.)
+ *
+ * This should initially be called with the result from
+ * dvmGetRegisterMapClassData().
+ */
+const RegisterMap* dvmGetNextRegisterMap(const void** pPtr);
+
+/*
+ * This holds some meta-data while we construct the set of register maps
+ * for a DEX file.
+ *
+ * In particular, it keeps track of our temporary mmap region so we can
+ * free it later.
+ */
+typedef struct RegisterMapBuilder {
+    /* public */
+    void*       data;
+    size_t      size;
+
+    /* private */
+    MemMapping  memMap;
+} RegisterMapBuilder;
+
+/*
+ * Generate a register map set for all verified classes in "pDvmDex".
+ */
+RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex);
+
+/*
+ * Free the builder.
+ */
+void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder);
+
+
 /*
  * Generate the register map for a previously-verified method.
  *
@@ -97,4 +254,7 @@
  */
 RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata);
 
+/* dump stats gathered during register map creation process */
+void dvmRegisterMapDumpStats(void);
+
 #endif /*_DALVIK_REGISTERMAP*/
diff --git a/vm/analysis/VerifySubs.h b/vm/analysis/VerifySubs.h
index 4d5b57c..a87c6f1 100644
--- a/vm/analysis/VerifySubs.h
+++ b/vm/analysis/VerifySubs.h
@@ -57,7 +57,11 @@
 #define LOG_VFY_METH(_meth, ...)    dvmLogVerifyFailure(_meth, __VA_ARGS__)
 
 /* log verification failure with optional method info */
-void dvmLogVerifyFailure(const Method* meth, const char* format, ...);
+void dvmLogVerifyFailure(const Method* meth, const char* format, ...)
+#if defined(__GNUC__)
+    __attribute__ ((format(printf, 2, 3)))
+#endif
+    ;
 
 /* log verification failure due to resolution trouble */
 void dvmLogUnableToResolveClass(const char* missingClassDescr,
diff --git a/vm/hprof/Hprof.c b/vm/hprof/Hprof.c
index 66b46f4..2e6f7c9 100644
--- a/vm/hprof/Hprof.c
+++ b/vm/hprof/Hprof.c
@@ -111,7 +111,10 @@
     }
 }
 
-void
+/*
+ * Finish up the hprof dump.  Returns true on success.
+ */
+bool
 hprofShutdown(hprof_context_t *ctx)
 {
     FILE *tempFp = ctx->fp;
@@ -131,7 +134,7 @@
         fclose(tempFp);
         free(ctx->fileName);
         free(ctx);
-        return;
+        return false;
     }
     hprofContextInit(ctx, ctx->fileName, fp, true);
 
@@ -179,4 +182,5 @@
 
     /* throw out a log message for the benefit of "runhat" */
     LOGI("hprof: heap dump completed, temp file removed\n");
+    return true;
 }
diff --git a/vm/hprof/Hprof.h b/vm/hprof/Hprof.h
index e0e2d4b..696b0a7 100644
--- a/vm/hprof/Hprof.h
+++ b/vm/hprof/Hprof.h
@@ -235,7 +235,7 @@
  */
 
 hprof_context_t *hprofStartup(const char *outputFileName);
-void hprofShutdown(hprof_context_t *ctx);
+bool hprofShutdown(hprof_context_t *ctx);
 
 /*
  * Heap.c functions
@@ -244,7 +244,7 @@
  * the heap implementation; these functions require heap knowledge,
  * so they are implemented in Heap.c.
  */
-void hprofDumpHeap(const char* fileName);
+int hprofDumpHeap(const char* fileName);
 void dvmHeapSetHprofGcScanState(hprof_heap_tag_t state, u4 threadSerialNumber);
 
 #endif  // _DALVIK_HPROF_HPROF
diff --git a/vm/interp/Stack.c b/vm/interp/Stack.c
index 103d2b4..730b1a8 100644
--- a/vm/interp/Stack.c
+++ b/vm/interp/Stack.c
@@ -76,9 +76,10 @@
 
     if (stackPtr - stackReq < self->interpStackEnd) {
         /* not enough space */
-        LOGW("Stack overflow on call to interp (top=%p cur=%p size=%d %s.%s)\n",
-            self->interpStackStart, self->curFrame, self->interpStackSize,
-            method->clazz->descriptor, method->name);
+        LOGW("Stack overflow on call to interp "
+             "(req=%d top=%p cur=%p size=%d %s.%s)\n",
+            stackReq, self->interpStackStart, self->curFrame,
+            self->interpStackSize, method->clazz->descriptor, method->name);
         dvmHandleStackOverflow(self);
         assert(dvmCheckException(self));
         return false;
@@ -148,9 +149,10 @@
 
     if (stackPtr - stackReq < self->interpStackEnd) {
         /* not enough space */
-        LOGW("Stack overflow on call to native (top=%p cur=%p size=%d '%s')\n",
-            self->interpStackStart, self->curFrame, self->interpStackSize,
-            method->name);
+        LOGW("Stack overflow on call to native "
+             "(req=%d top=%p cur=%p size=%d '%s')\n",
+            stackReq, self->interpStackStart, self->curFrame,
+            self->interpStackSize, method->name);
         dvmHandleStackOverflow(self);
         assert(dvmCheckException(self));
         return false;
@@ -217,9 +219,10 @@
 
     if (stackPtr - stackReq < self->interpStackEnd) {
         /* not enough space; let JNI throw the exception */
-        LOGW("Stack overflow on PushLocal (top=%p cur=%p size=%d '%s')\n",
-            self->interpStackStart, self->curFrame, self->interpStackSize,
-            method->name);
+        LOGW("Stack overflow on PushLocal "
+             "(req=%d top=%p cur=%p size=%d '%s')\n",
+            stackReq, self->interpStackStart, self->curFrame,
+            self->interpStackSize, method->name);
         dvmHandleStackOverflow(self);
         assert(dvmCheckException(self));
         return false;
@@ -351,7 +354,8 @@
 
 #ifndef NDEBUG
     if (self->status != THREAD_RUNNING) {
-        LOGW("Status=%d on call to %s.%s -\n", self->status,
+        LOGW("threadid=%d: status=%d on call to %s.%s -\n",
+            self->threadId, self->status,
             method->clazz->descriptor, method->name);
     }
 #endif
@@ -504,11 +508,17 @@
     //dvmDumpThreadStack(dvmThreadSelf());
 
     if (dvmIsNativeMethod(method)) {
+#ifdef WITH_PROFILER
+        TRACE_METHOD_ENTER(self, method);
+#endif
         /*
          * Because we leave no space for local variables, "curFrame" points
          * directly at the method arguments.
          */
         (*method->nativeFunc)(self->curFrame, pResult, method, self);
+#ifdef WITH_PROFILER
+        TRACE_METHOD_EXIT(self, method);
+#endif
     } else {
         dvmInterpret(self, method, pResult);
     }
@@ -608,11 +618,17 @@
 #endif
 
     if (dvmIsNativeMethod(method)) {
+#ifdef WITH_PROFILER
+        TRACE_METHOD_ENTER(self, method);
+#endif
         /*
          * Because we leave no space for local variables, "curFrame" points
          * directly at the method arguments.
          */
         (*method->nativeFunc)(self->curFrame, pResult, method, self);
+#ifdef WITH_PROFILER
+        TRACE_METHOD_EXIT(self, method);
+#endif
     } else {
         dvmInterpret(self, method, pResult);
     }
@@ -712,11 +728,17 @@
     //dvmDumpThreadStack(dvmThreadSelf());
 
     if (dvmIsNativeMethod(method)) {
+#ifdef WITH_PROFILER
+        TRACE_METHOD_ENTER(self, method);
+#endif
         /*
          * Because we leave no space for local variables, "curFrame" points
          * directly at the method arguments.
          */
         (*method->nativeFunc)(self->curFrame, &retval, method, self);
+#ifdef WITH_PROFILER
+        TRACE_METHOD_EXIT(self, method);
+#endif
     } else {
         dvmInterpret(self, method, &retval);
     }
diff --git a/vm/mterp/armv5te/OP_MONITOR_ENTER.S b/vm/mterp/armv5te/OP_MONITOR_ENTER.S
index 6d4c2d8..524621a 100644
--- a/vm/mterp/armv5te/OP_MONITOR_ENTER.S
+++ b/vm/mterp/armv5te/OP_MONITOR_ENTER.S
@@ -8,9 +8,7 @@
     GET_VREG(r1, r2)                    @ r1<- vAA (object)
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
     cmp     r1, #0                      @ null object?
-#ifdef WITH_MONITOR_TRACKING
-    EXPORT_PC()                         @ export PC so we can grab stack trace
-#endif
+    EXPORT_PC()                         @ need for precise GC, MONITOR_TRACKING
     beq     common_errNullObject        @ null object, throw an exception
     FETCH_ADVANCE_INST(1)               @ advance rPC, load rINST
     bl      dvmLockObject               @ call(self, obj)
diff --git a/vm/mterp/armv5te/footer.S b/vm/mterp/armv5te/footer.S
index 8f7cc41..0e5898b 100644
--- a/vm/mterp/armv5te/footer.S
+++ b/vm/mterp/armv5te/footer.S
@@ -54,7 +54,7 @@
 #endif
 
     cmp     r3, #0                      @ suspend pending?
-    bne     2f                          @ yes, check suspend
+    bne     2f                          @ yes, do full suspension check
 
 #if defined(WITH_DEBUGGER) || defined(WITH_PROFILER)
 # if defined(WITH_DEBUGGER) && defined(WITH_PROFILER)
@@ -72,6 +72,7 @@
 
 2:  @ check suspend
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
+    EXPORT_PC()                         @ need for precise GC
     b       dvmCheckSuspendPending      @ suspend if necessary, then return
 
 3:  @ debugger/profiler enabled, bail out
diff --git a/vm/mterp/c/OP_MONITOR_ENTER.c b/vm/mterp/c/OP_MONITOR_ENTER.c
index 4d70da7..c9d8999 100644
--- a/vm/mterp/c/OP_MONITOR_ENTER.c
+++ b/vm/mterp/c/OP_MONITOR_ENTER.c
@@ -9,9 +9,7 @@
         if (!checkForNullExportPC(obj, fp, pc))
             GOTO_exceptionThrown();
         ILOGV("+ locking %p %s\n", obj, obj->clazz->descriptor);
-#ifdef WITH_MONITOR_TRACKING
-        EXPORT_PC();        /* need for stack trace */
-#endif
+        EXPORT_PC();    /* need for precise GC, also WITH_MONITOR_TRACKING */
         dvmLockObject(self, obj);
 #ifdef WITH_DEADLOCK_PREDICTION
         if (dvmCheckException(self))
diff --git a/vm/mterp/c/header.c b/vm/mterp/c/header.c
index e35ded4..d2fca9c 100644
--- a/vm/mterp/c/header.c
+++ b/vm/mterp/c/header.c
@@ -46,7 +46,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -86,6 +86,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -109,9 +121,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -296,6 +312,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
diff --git a/vm/mterp/cstubs/stubdefs.c b/vm/mterp/cstubs/stubdefs.c
index 1de6f0e..d4162c8 100644
--- a/vm/mterp/cstubs/stubdefs.c
+++ b/vm/mterp/cstubs/stubdefs.c
@@ -107,7 +107,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             glue->entryPoint = _entryPoint;                                 \
diff --git a/vm/mterp/out/InterpAsm-armv4.S b/vm/mterp/out/InterpAsm-armv4.S
index 3de7aea..f183814 100644
--- a/vm/mterp/out/InterpAsm-armv4.S
+++ b/vm/mterp/out/InterpAsm-armv4.S
@@ -818,9 +818,7 @@
     GET_VREG(r1, r2)                    @ r1<- vAA (object)
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
     cmp     r1, #0                      @ null object?
-#ifdef WITH_MONITOR_TRACKING
-    EXPORT_PC()                         @ export PC so we can grab stack trace
-#endif
+    EXPORT_PC()                         @ need for precise GC, MONITOR_TRACKING
     beq     common_errNullObject        @ null object, throw an exception
     FETCH_ADVANCE_INST(1)               @ advance rPC, load rINST
     bl      dvmLockObject               @ call(self, obj)
@@ -9342,7 +9340,7 @@
 #endif
 
     cmp     r3, #0                      @ suspend pending?
-    bne     2f                          @ yes, check suspend
+    bne     2f                          @ yes, do full suspension check
 
 #if defined(WITH_DEBUGGER) || defined(WITH_PROFILER)
 # if defined(WITH_DEBUGGER) && defined(WITH_PROFILER)
@@ -9360,6 +9358,7 @@
 
 2:  @ check suspend
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
+    EXPORT_PC()                         @ need for precise GC
     b       dvmCheckSuspendPending      @ suspend if necessary, then return
 
 3:  @ debugger/profiler enabled, bail out
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index 9987ff5..26c8860 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -818,9 +818,7 @@
     GET_VREG(r1, r2)                    @ r1<- vAA (object)
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
     cmp     r1, #0                      @ null object?
-#ifdef WITH_MONITOR_TRACKING
-    EXPORT_PC()                         @ export PC so we can grab stack trace
-#endif
+    EXPORT_PC()                         @ need for precise GC, MONITOR_TRACKING
     beq     common_errNullObject        @ null object, throw an exception
     FETCH_ADVANCE_INST(1)               @ advance rPC, load rINST
     bl      dvmLockObject               @ call(self, obj)
@@ -9336,7 +9334,7 @@
 #endif
 
     cmp     r3, #0                      @ suspend pending?
-    bne     2f                          @ yes, check suspend
+    bne     2f                          @ yes, do full suspension check
 
 #if defined(WITH_DEBUGGER) || defined(WITH_PROFILER)
 # if defined(WITH_DEBUGGER) && defined(WITH_PROFILER)
@@ -9354,6 +9352,7 @@
 
 2:  @ check suspend
     ldr     r0, [rGLUE, #offGlue_self]  @ r0<- glue->self
+    EXPORT_PC()                         @ need for precise GC
     b       dvmCheckSuspendPending      @ suspend if necessary, then return
 
 3:  @ debugger/profiler enabled, bail out
diff --git a/vm/mterp/out/InterpAsm-x86.S b/vm/mterp/out/InterpAsm-x86.S
index a80e59e..0fd3e42 100644
--- a/vm/mterp/out/InterpAsm-x86.S
+++ b/vm/mterp/out/InterpAsm-x86.S
@@ -682,9 +682,7 @@
     movl    offGlue_self(%ecx),%ecx     # ecx<- glue->self
     FETCH_INST_WORD(1)
     testl   %eax,%eax                   # null object?
-#ifdef WITH_MONITOR_TRACKING
-    EXPORT_PC()
-#endif
+    EXPORT_PC()                         # need for precise GC, MONITOR_TRACKING
     jne     .LOP_MONITOR_ENTER_continue
     jmp     common_errNullObject
 
@@ -8618,6 +8616,7 @@
      *      bool dvmCheckSuspendPending(Thread* self)
      *  Because we reached here via a call, go ahead and build a new frame.
      */
+    EXPORT_PC()                         # need for precise GC
     movl    offGlue_self(%ecx),%eax      # eax<- glue->self
     SPILL(rPC)                      # save edx
     push    %ebp
diff --git a/vm/mterp/out/InterpC-allstubs.c b/vm/mterp/out/InterpC-allstubs.c
index 635a873..cde7b27 100644
--- a/vm/mterp/out/InterpC-allstubs.c
+++ b/vm/mterp/out/InterpC-allstubs.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -513,7 +531,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             glue->entryPoint = _entryPoint;                                 \
@@ -1525,9 +1546,7 @@
         if (!checkForNullExportPC(obj, fp, pc))
             GOTO_exceptionThrown();
         ILOGV("+ locking %p %s\n", obj, obj->clazz->descriptor);
-#ifdef WITH_MONITOR_TRACKING
-        EXPORT_PC();        /* need for stack trace */
-#endif
+        EXPORT_PC();    /* need for precise GC, also WITH_MONITOR_TRACKING */
         dvmLockObject(self, obj);
 #ifdef WITH_DEADLOCK_PREDICTION
         if (dvmCheckException(self))
diff --git a/vm/mterp/out/InterpC-armv4.c b/vm/mterp/out/InterpC-armv4.c
index 2fcdcab..11acb39 100644
--- a/vm/mterp/out/InterpC-armv4.c
+++ b/vm/mterp/out/InterpC-armv4.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -513,7 +531,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             glue->entryPoint = _entryPoint;                                 \
diff --git a/vm/mterp/out/InterpC-armv5te.c b/vm/mterp/out/InterpC-armv5te.c
index 47c8709..c8f4ced 100644
--- a/vm/mterp/out/InterpC-armv5te.c
+++ b/vm/mterp/out/InterpC-armv5te.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -513,7 +531,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             glue->entryPoint = _entryPoint;                                 \
diff --git a/vm/mterp/out/InterpC-portdbg.c b/vm/mterp/out/InterpC-portdbg.c
index d527cc0..7f7dd8a 100644
--- a/vm/mterp/out/InterpC-portdbg.c
+++ b/vm/mterp/out/InterpC-portdbg.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -486,7 +504,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             interpState->entryPoint = _entryPoint;                          \
@@ -1869,9 +1890,7 @@
         if (!checkForNullExportPC(obj, fp, pc))
             GOTO_exceptionThrown();
         ILOGV("+ locking %p %s\n", obj, obj->clazz->descriptor);
-#ifdef WITH_MONITOR_TRACKING
-        EXPORT_PC();        /* need for stack trace */
-#endif
+        EXPORT_PC();    /* need for precise GC, also WITH_MONITOR_TRACKING */
         dvmLockObject(self, obj);
 #ifdef WITH_DEADLOCK_PREDICTION
         if (dvmCheckException(self))
diff --git a/vm/mterp/out/InterpC-portstd.c b/vm/mterp/out/InterpC-portstd.c
index 64e5ccd..367332c 100644
--- a/vm/mterp/out/InterpC-portstd.c
+++ b/vm/mterp/out/InterpC-portstd.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -485,7 +503,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             interpState->entryPoint = _entryPoint;                          \
@@ -1589,9 +1610,7 @@
         if (!checkForNullExportPC(obj, fp, pc))
             GOTO_exceptionThrown();
         ILOGV("+ locking %p %s\n", obj, obj->clazz->descriptor);
-#ifdef WITH_MONITOR_TRACKING
-        EXPORT_PC();        /* need for stack trace */
-#endif
+        EXPORT_PC();    /* need for precise GC, also WITH_MONITOR_TRACKING */
         dvmLockObject(self, obj);
 #ifdef WITH_DEADLOCK_PREDICTION
         if (dvmCheckException(self))
diff --git a/vm/mterp/out/InterpC-x86.c b/vm/mterp/out/InterpC-x86.c
index cd5fe95..ac524f4 100644
--- a/vm/mterp/out/InterpC-x86.c
+++ b/vm/mterp/out/InterpC-x86.c
@@ -53,7 +53,7 @@
  */
 #define THREADED_INTERP             /* threaded vs. while-loop interpreter */
 
-#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia */
+#ifdef WITH_INSTR_CHECKS            /* instruction-level paranoia (slow!) */
 # define CHECK_BRANCH_OFFSETS
 # define CHECK_REGISTER_INDICES
 #endif
@@ -93,6 +93,18 @@
 #endif
 
 /*
+ * Export another copy of the PC on every instruction; this is largely
+ * redundant with EXPORT_PC and the debugger code.  This value can be
+ * compared against what we have stored on the stack with EXPORT_PC to
+ * help ensure that we aren't missing any export calls.
+ */
+#if WITH_EXTRA_GC_CHECKS > 1
+# define EXPORT_EXTRA_PC() (self->currentPc2 = pc)
+#else
+# define EXPORT_EXTRA_PC()
+#endif
+
+/*
  * Adjust the program counter.  "_offset" is a signed int, in 16-bit units.
  *
  * Assumes the existence of "const u2* pc" and "const u2* curMethod->insns".
@@ -116,9 +128,13 @@
             dvmAbort();                                                     \
         }                                                                   \
         pc += myoff;                                                        \
+        EXPORT_EXTRA_PC();                                                  \
     } while (false)
 #else
-# define ADJUST_PC(_offset) (pc += _offset)
+# define ADJUST_PC(_offset) do {                                            \
+        pc += _offset;                                                      \
+        EXPORT_EXTRA_PC();                                                  \
+    } while (false)
 #endif
 
 /*
@@ -303,6 +319,8 @@
  * within the current method won't be shown correctly.  See the notes
  * in Exception.c.
  *
+ * This is also used to determine the address for precise GC.
+ *
  * Assumes existence of "u4* fp" and "const u2* pc".
  */
 #define EXPORT_PC()         (SAVEAREA_FROM_FP(fp)->xtra.currentPc = pc)
@@ -513,7 +531,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             glue->entryPoint = _entryPoint;                                 \
diff --git a/vm/mterp/portable/stubdefs.c b/vm/mterp/portable/stubdefs.c
index 0ea563c..b809caf 100644
--- a/vm/mterp/portable/stubdefs.c
+++ b/vm/mterp/portable/stubdefs.c
@@ -73,7 +73,10 @@
  * started.  If so, switch to a different "goto" table.
  */
 #define PERIODIC_CHECKS(_entryPoint, _pcadj) {                              \
-        dvmCheckSuspendQuick(self);                                         \
+        if (dvmCheckSuspendQuick(self)) {                                   \
+            EXPORT_PC();  /* need for precise GC */                         \
+            dvmCheckSuspendPending(self);                                   \
+        }                                                                   \
         if (NEED_INTERP_SWITCH(INTERP_TYPE)) {                              \
             ADJUST_PC(_pcadj);                                              \
             interpState->entryPoint = _entryPoint;                          \
diff --git a/vm/mterp/x86/OP_MONITOR_ENTER.S b/vm/mterp/x86/OP_MONITOR_ENTER.S
index 18425f4..548f71f 100644
--- a/vm/mterp/x86/OP_MONITOR_ENTER.S
+++ b/vm/mterp/x86/OP_MONITOR_ENTER.S
@@ -10,9 +10,7 @@
     movl    offGlue_self(%ecx),%ecx     # ecx<- glue->self
     FETCH_INST_WORD(1)
     testl   %eax,%eax                   # null object?
-#ifdef WITH_MONITOR_TRACKING
-    EXPORT_PC()
-#endif
+    EXPORT_PC()                         # need for precise GC, MONITOR_TRACKING
     jne     .L${opcode}_continue
     jmp     common_errNullObject
 %break
diff --git a/vm/mterp/x86/footer.S b/vm/mterp/x86/footer.S
index 50634dd..a3efc0e 100644
--- a/vm/mterp/x86/footer.S
+++ b/vm/mterp/x86/footer.S
@@ -106,6 +106,7 @@
      *      bool dvmCheckSuspendPending(Thread* self)
      *  Because we reached here via a call, go ahead and build a new frame.
      */
+    EXPORT_PC()                         # need for precise GC
     movl    offGlue_self(%ecx),%eax      # eax<- glue->self
     SPILL(rPC)                      # save edx
     push    %ebp
diff --git a/vm/native/dalvik_system_VMDebug.c b/vm/native/dalvik_system_VMDebug.c
index 9eccb76..ec6d92e 100644
--- a/vm/native/dalvik_system_VMDebug.c
+++ b/vm/native/dalvik_system_VMDebug.c
@@ -244,6 +244,23 @@
 }
 
 /*
+ * static boolean isMethodTracingActive()
+ *
+ * Determine whether method tracing is currently active.
+ */
+static void Dalvik_dalvik_system_VMDebug_isMethodTracingActive(const u4* args,
+    JValue* pResult)
+{
+    UNUSED_PARAMETER(args);
+
+#ifdef WITH_PROFILER
+    RETURN_BOOLEAN(dvmIsMethodTraceActive());
+#else
+    RETURN_BOOLEAN(false);
+#endif
+}
+
+/*
  * static void stopMethodTracing()
  *
  * Stop method tracing.
@@ -527,6 +544,7 @@
 #ifdef WITH_HPROF
     StringObject* fileNameStr = (StringObject*) args[0];
     char* fileName;
+    int result;
 
     if (fileNameStr == NULL) {
         dvmThrowException("Ljava/lang/NullPointerException;", NULL);
@@ -540,8 +558,15 @@
         RETURN_VOID();
     }
 
-    hprofDumpHeap(fileName);
+    result = hprofDumpHeap(fileName);
     free(fileName);
+
+    if (result != 0) {
+        /* ideally we'd throw something more specific based on actual failure */
+        dvmThrowException("Ljava/lang/RuntimeException;",
+            "Failure during heap dump -- check log output for details");
+        RETURN_VOID();
+    }
 #else
     dvmThrowException("Ljava/lang/UnsupportedOperationException;", NULL);
 #endif
@@ -562,6 +587,8 @@
         Dalvik_dalvik_system_VMDebug_stopAllocCounting },
     { "startMethodTracing",         "(Ljava/lang/String;II)V",
         Dalvik_dalvik_system_VMDebug_startMethodTracing },
+    { "isMethodTracingActive",      "()Z",
+        Dalvik_dalvik_system_VMDebug_isMethodTracingActive },
     { "stopMethodTracing",          "()V",
         Dalvik_dalvik_system_VMDebug_stopMethodTracing },
     { "startEmulatorTracing",       "()V",
diff --git a/vm/oo/Class.c b/vm/oo/Class.c
index c7ab763..6dd2e8c 100644
--- a/vm/oo/Class.c
+++ b/vm/oo/Class.c
@@ -1721,7 +1721,39 @@
         dvmLinearReadOnly(classLoader, newClass->ifields);
     }
 
-    /* load method definitions */
+    /*
+     * Load method definitions.  We do this in two batches, direct then
+     * virtual.
+     *
+     * If register maps have already been generated for this class, and
+     * precise GC is enabled, we pull out pointers to them.  We know that
+     * they were streamed to the DEX file in the same order in which the
+     * methods appear.
+     *
+     * If the class wasn't pre-verified, the maps will be generated when
+     * the class is verified during class initialization.
+     */
+    u4 classDefIdx = dexGetIndexForClassDef(pDexFile, pClassDef);
+    const void* classMapData;
+    u4 numMethods;
+
+    if (gDvm.preciseGc) {
+        classMapData =
+            dvmGetRegisterMapClassData(pDexFile, classDefIdx, &numMethods);
+
+        /* sanity check */
+        if (classMapData != NULL &&
+            pHeader->directMethodsSize + pHeader->virtualMethodsSize != numMethods)
+        {
+            LOGE("ERROR: in %s, direct=%d virtual=%d, maps have %d\n",
+                newClass->descriptor, pHeader->directMethodsSize,
+                pHeader->virtualMethodsSize, numMethods);
+            assert(false);
+            classMapData = NULL;        /* abandon */
+        }
+    } else {
+        classMapData = NULL;
+    }
 
     if (pHeader->directMethodsSize != 0) {
         int count = (int) pHeader->directMethodsSize;
@@ -1734,6 +1766,15 @@
         for (i = 0; i < count; i++) {
             dexReadClassDataMethod(&pEncodedData, &method, &lastIndex);
             loadMethodFromDex(newClass, &method, &newClass->directMethods[i]);
+            if (classMapData != NULL) {
+                const RegisterMap* pMap = dvmGetNextRegisterMap(&classMapData);
+                if (dvmGetRegisterMapFormat(pMap) != kRegMapFormatNone) {
+                    newClass->directMethods[i].registerMap = pMap;
+                    /* TODO: add rigorous checks */
+                    assert((newClass->directMethods[i].registersSize+7) / 8 ==
+                        newClass->directMethods[i].registerMap->regWidth);
+                }
+            }
         }
         dvmLinearReadOnly(classLoader, newClass->directMethods);
     }
@@ -1749,6 +1790,15 @@
         for (i = 0; i < count; i++) {
             dexReadClassDataMethod(&pEncodedData, &method, &lastIndex);
             loadMethodFromDex(newClass, &method, &newClass->virtualMethods[i]);
+            if (classMapData != NULL) {
+                const RegisterMap* pMap = dvmGetNextRegisterMap(&classMapData);
+                if (dvmGetRegisterMapFormat(pMap) != kRegMapFormatNone) {
+                    newClass->virtualMethods[i].registerMap = pMap;
+                    /* TODO: add rigorous checks */
+                    assert((newClass->virtualMethods[i].registersSize+7) / 8 ==
+                        newClass->virtualMethods[i].registerMap->regWidth);
+                }
+            }
         }
         dvmLinearReadOnly(classLoader, newClass->virtualMethods);
     }
@@ -1913,6 +1963,8 @@
 
 /*
  * Free anything in a Method that was allocated on the system heap.
+ *
+ * The containing class is largely torn down by this point.
  */
 static void freeMethodInnards(Method* meth)
 {
@@ -1920,26 +1972,31 @@
     free(meth->exceptions);
     free(meth->lines);
     free(meth->locals);
-#else
-    // TODO: call dvmFreeRegisterMap() if meth->registerMap was allocated
-    //       on the system heap
-    UNUSED_PARAMETER(meth);
 #endif
+
+    /*
+     * Some register maps are allocated on the heap, either because of late
+     * verification or because we're caching an uncompressed form.
+     */
+    const RegisterMap* pMap = meth->registerMap;
+    if (pMap != NULL && dvmGetRegisterMapOnHeap(pMap)) {
+        dvmFreeRegisterMap((RegisterMap*) pMap);
+        meth->registerMap = NULL;
+    }
 }
 
 /*
  * Clone a Method, making new copies of anything that will be freed up
- * by freeMethodInnards().
+ * by freeMethodInnards().  This is used for "miranda" methods.
  */
 static void cloneMethod(Method* dst, const Method* src)
 {
+    if (src->registerMap != NULL) {
+        LOGE("GLITCH: only expected abstract methods here\n");
+        LOGE("        cloning %s.%s\n", src->clazz->descriptor, src->name);
+        dvmAbort();
+    }
     memcpy(dst, src, sizeof(Method));
-#if 0
-    /* for current usage, these are never set, so no need to implement copy */
-    assert(dst->exceptions == NULL);
-    assert(dst->lines == NULL);
-    assert(dst->locals == NULL);
-#endif
 }
 
 /*
@@ -4270,9 +4327,9 @@
 
 /*
  * Add a RegisterMap to a Method.  This is done when we verify the class
- * and compute the register maps at class initialization time, which means
- * that "pMap" is on the heap and should be freed when the Method is
- * discarded.
+ * and compute the register maps at class initialization time (i.e. when
+ * we don't have a pre-generated map).  This means "pMap" is on the heap
+ * and should be freed when the Method is discarded.
  */
 void dvmSetRegisterMap(Method* method, const RegisterMap* pMap)
 {
@@ -4283,6 +4340,7 @@
             method->clazz->descriptor, method->name);
         /* keep going */
     }
+    assert(!dvmIsNativeMethod(method) && !dvmIsAbstractMethod(method));
 
     /* might be virtual or direct */
     dvmLinearReadWrite(clazz->classLoader, clazz->virtualMethods);
diff --git a/vm/test/AtomicSpeed.c b/vm/test/AtomicSpeed.c
new file mode 100644
index 0000000..e2ffbef
--- /dev/null
+++ b/vm/test/AtomicSpeed.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Atomic operation performance test.
+ */
+#include "Dalvik.h"
+
+//#define TRIVIAL_COMPARE     /* do something simple instead of an atomic op */
+
+/*
+ * Perform operation.  Returns elapsed time.
+ */
+u8 dvmTestAtomicSpeedSub(int repeatCount)
+{
+    static int value = 7;
+    int* valuePtr = &value;
+    u8 start, end;
+    int i;
+    
+#ifdef TRIVIAL_COMPARE
+    /* init to arg value so compiler can't pre-determine result */
+    int j = repeatCount;
+#endif
+
+    assert((repeatCount % 10) == 0);
+
+    start = dvmGetRelativeTimeNsec();
+
+    for (i = repeatCount / 10; i != 0; i--) {
+#ifdef TRIVIAL_COMPARE
+        // integer add (Dream: 3.4ns -- THUMB has 10 adds, ARM condenses)
+        j += i; j += i; j += i; j += i; j += i;
+        j += i; j += i; j += i; j += i; j += i;
+#else
+        // succeed 10x (Dream: 155.9ns)
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 7, 7);
+
+        // fail 10x (Dream: 158.5ns)
+        /*
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        ATOMIC_CMP_SWAP(valuePtr, 6, 7);
+        */
+#endif
+    }
+
+    end = dvmGetRelativeTimeNsec();
+
+#ifdef TRIVIAL_COMPARE
+    /* use value so compiler can't eliminate it */
+    dvmFprintf(stdout, "%d\n", j);
+#else
+    dvmFprintf(stdout, ".");
+    fflush(stdout);     // not quite right if they intercepted fprintf
+#endif
+    return end - start;
+}
+
+/*
+ * Control loop.
+ */
+bool dvmTestAtomicSpeed(void)
+{
+    static const int kIterations = 10;
+    static const int kRepeatCount = 5 * 1000 * 1000;
+    static const int kDelay = 500 * 1000;
+    u8 results[kIterations];
+    int i;
+
+    for (i = 0; i < kIterations; i++) {
+        results[i] = dvmTestAtomicSpeedSub(kRepeatCount);
+        usleep(kDelay);
+    }
+
+    dvmFprintf(stdout, "\n");
+    dvmFprintf(stdout, "Atomic speed test results (%d per iteration):\n",
+        kRepeatCount);
+    for (i = 0; i < kIterations; i++) {
+        dvmFprintf(stdout,
+            " %2d: %.3fns\n", i, (double) results[i] / kRepeatCount);
+    }
+
+    return true;
+}
+
diff --git a/vm/test/Test.h b/vm/test/Test.h
index a6b54a5..ce47aae 100644
--- a/vm/test/Test.h
+++ b/vm/test/Test.h
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Internal unit tests.
  */
@@ -20,5 +21,6 @@
 #define _DALVIK_TEST_TEST
 
 bool dvmTestHash(void);
+bool dvmTestAtomicSpeed(void);
 
 #endif /*_DALVIK_TEST_TEST*/