Merge "Do not overwrite an input register in shift operations."
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 28bd754..a57b619 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -472,6 +472,7 @@
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
+  bool gcstress_ = false;
 };
 
 template <>
@@ -509,6 +510,10 @@
         xgc.verify_post_gc_rosalloc_ = true;
       } else if (gc_option == "nopostverify_rosalloc") {
         xgc.verify_post_gc_rosalloc_ = false;
+      } else if (gc_option == "gcstress") {
+        xgc.gcstress_ = true;
+      } else if (gc_option == "nogcstress") {
+        xgc.gcstress_ = false;
       } else if ((gc_option == "precise") ||
                  (gc_option == "noprecise") ||
                  (gc_option == "verifycardtable") ||
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 97703a5..2523a83 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -382,8 +382,8 @@
     Instruction::UNUSED_F5,
     Instruction::CREATE_LAMBDA,
     Instruction::UNUSED_F7,
-    Instruction::UNUSED_F8,
-    Instruction::UNUSED_F9,
+    Instruction::BOX_LAMBDA,
+    Instruction::UNBOX_LAMBDA,
     Instruction::UNUSED_FA,
     Instruction::UNUSED_FB,
     Instruction::UNUSED_FC,
@@ -425,6 +425,8 @@
 static const int kUnsupportedLambdaOpcodes[] = {
     Instruction::INVOKE_LAMBDA,
     Instruction::CREATE_LAMBDA,
+    Instruction::BOX_LAMBDA,
+    Instruction::UNBOX_LAMBDA,
 };
 
 // Unsupported opcodes. Null can be used when everything is supported. Size of the lists is
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 84b6a52..4cdf75b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1410,7 +1410,9 @@
       is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
     } else {
       is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
-                    heap->FindSpaceFromObject(method->GetDeclaringClass(), false)->IsImageSpace();
+                    heap->FindSpaceFromObject(method->GetDeclaringClass(), false)->IsImageSpace() &&
+                    !cl->IsQuickToInterpreterBridge(
+                        reinterpret_cast<const void*>(compiler_->GetEntryPointOf(method)));
     }
     if (!is_in_image) {
       // We can only branch directly to Methods that are resolved in the DexCache.
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2b85c7c..7d723ef 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -96,7 +96,7 @@
 class HGraphVisualizerDisassembler {
  public:
   HGraphVisualizerDisassembler(InstructionSet instruction_set, const uint8_t* base_address)
-      : instruction_set_(instruction_set) {
+      : instruction_set_(instruction_set), disassembler_(nullptr) {
     libart_disassembler_handle_ =
         dlopen(kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so", RTLD_NOW);
     if (libart_disassembler_handle_ == nullptr) {
@@ -128,6 +128,10 @@
   }
 
   void Disassemble(std::ostream& output, size_t start, size_t end) const {
+    if (disassembler_ == nullptr) {
+      return;
+    }
+
     const uint8_t* base = disassembler_->GetDisassemblerOptions()->base_address_;
     if (instruction_set_ == kThumb2) {
       // ARM and Thumb-2 use the same disassembler. The bottom bit of the
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index a048c85..6784098 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -222,9 +222,10 @@
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
   mirror::DexCache* dex_cache = cl->FindDexCache(info.GetDexFile());
   ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache);
-  DCHECK(field != nullptr);
-  mirror::Class* klass = field->GetType<false>();
-  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  if (field != nullptr) {
+    mirror::Class* klass = field->GetType<false>();
+    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  }
 }
 
 void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
@@ -323,9 +324,10 @@
   mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile());
   ArtMethod* method = dex_cache->GetResolvedMethod(
       instr->GetDexMethodIndex(), cl->GetImagePointerSize());
-  DCHECK(method != nullptr);
-  mirror::Class* klass = method->GetReturnType(false);
-  SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  if (method != nullptr) {
+    mirror::Class* klass = method->GetReturnType(false);
+    SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+  }
 }
 
 void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index d21f551..0d842cc 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -16,6 +16,7 @@
 
 #include "barrier.h"
 
+#include "base/logging.h"
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "thread.h"
@@ -87,7 +88,14 @@
 }
 
 Barrier::~Barrier() {
-  CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
+  if (gAborting == 0) {
+    // Only check when not aborting.
+    CHECK_EQ(count_, 0) << "Attempted to destroy barrier with non zero count";
+  } else {
+    if (count_ != 0) {
+      LOG(WARNING) << "Attempted to destroy barrier with non zero count " << count_;
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 8daf6d4..f2c8355 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -469,8 +469,6 @@
     }
     // Resize based on the minimum load factor.
     Resize(min_index);
-    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
-    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   // Expand / shrink the table to the new specified size.
@@ -493,11 +491,18 @@
     if (owned_data) {
       allocfn_.deallocate(old_data, old_num_buckets);
     }
+
+    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
+    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
+    DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
+    size_t non_empty_count = 0;
     while (!emptyfn_.IsEmpty(data_[index])) {
       index = NextIndex(index);
+      non_empty_count++;
+      DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
     }
     return index;
   }
@@ -526,7 +531,7 @@
   Pred pred_;  // Equals function.
   size_t num_elements_;  // Number of inserted elements.
   size_t num_buckets_;  // Number of hash table buckets.
-  size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
+  size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
   bool owns_data_;  // If we own data_ and are responsible for freeing it.
   T* data_;  // Backing storage.
   double min_load_factor_;
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index e88637f..fd9eb45 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -156,6 +156,38 @@
   }
 }
 
+TEST_F(HashSetTest, TestShrink) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
+  for (size_t i = 0; i < strings.size(); ++i) {
+    // Insert some strings into the beginning of our hash set to establish an initial size
+    hash_set.Insert(strings[i]);
+  }
+
+  hash_set.ShrinkToMaximumLoad();
+  const double initial_load = hash_set.CalculateLoadFactor();
+
+  // Insert a bunch of random strings to guarantee that we grow the capacity.
+  std::vector<std::string> random_strings;
+  static constexpr size_t count = 1000;
+  for (size_t i = 0; i < count; ++i) {
+    random_strings.push_back(RandomString(10));
+    hash_set.Insert(random_strings[i]);
+  }
+
+  // Erase all the extra strings which guarantees that our load factor will be really bad.
+  for (size_t i = 0; i < count; ++i) {
+    hash_set.Erase(hash_set.Find(random_strings[i]));
+  }
+
+  const double bad_load = hash_set.CalculateLoadFactor();
+  EXPECT_GT(initial_load, bad_load);
+
+  // Shrink again, the load factor should be good again.
+  hash_set.ShrinkToMaximumLoad();
+  EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
+}
+
 TEST_F(HashSetTest, TestStress) {
   HashSet<std::string, IsEmptyFnString> hash_set;
   std::unordered_multiset<std::string> std_set;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 0ab148e..aa91ca1 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -61,6 +61,7 @@
   kAbortLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kTransactionLogLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
   kReferenceQueueFinalizerReferencesLock,
@@ -77,7 +78,6 @@
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
   kMarkSweepMarkStackLock,
-  kTransactionLogLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kDefaultMutexLevel,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 98fa897..3c1ab12 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1310,7 +1310,7 @@
 // reinit references to when reinitializing a ClassLinker from a
 // mapped image.
 void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
-  class_roots_.VisitRoot(visitor, RootInfo(kRootVMInternal));
+  class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   Thread* const self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
@@ -1333,9 +1333,9 @@
     }
   }
   VisitClassRoots(visitor, flags);
-  array_iftable_.VisitRoot(visitor, RootInfo(kRootVMInternal));
-  for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
-    find_array_class_cache_[i].VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
+  for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
+    root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   }
 }
 
@@ -4928,8 +4928,7 @@
             }
           }
           if (miranda_method == nullptr) {
-            size_t size = ArtMethod::ObjectSize(image_pointer_size_);
-            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(size));
+            miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
             CHECK(miranda_method != nullptr);
             // Point the interface table at a phantom slot.
             new(miranda_method) ArtMethod(*interface_method, image_pointer_size_);
@@ -4970,50 +4969,49 @@
     }
     StrideIterator<ArtMethod> out(
         reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
-    // Copy the mirada methods before making a copy of the vtable so that moving GC doesn't miss
-    // any roots. This is necessary since these miranda methods wont get their roots visited from
-    // the class table root visiting until they are copied to the new virtuals array.
-    const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count = old_vtable_count + miranda_methods.size();
-    size_t method_idx = old_vtable_count;
-    for (auto* mir_method : miranda_methods) {
-      ArtMethod* out_method = &*out;
-      // Leave the declaring class alone as type indices are relative to it
-      out_method->CopyFrom(mir_method, image_pointer_size_);
-      out_method->SetAccessFlags(out_method->GetAccessFlags() | kAccMiranda);
-      out_method->SetMethodIndex(0xFFFF & method_idx);
-      move_table.emplace(mir_method, out_method);
+    // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
+    // we want the roots of the miranda methods to get visited.
+    for (ArtMethod* mir_method : miranda_methods) {
+      out->CopyFrom(mir_method, image_pointer_size_);
+      out->SetAccessFlags(out->GetAccessFlags() | kAccMiranda);
+      move_table.emplace(mir_method, &*out);
       ++out;
-      ++method_idx;
     }
-    DCHECK_EQ(new_vtable_count, method_idx);
     UpdateClassVirtualMethods(klass.Get(), virtuals, new_method_count);
-    // Done copying methods, they are all reachable from the class now, so we can end the no thread
+    // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
+
+    const size_t old_vtable_count = vtable->GetLength();
+    const size_t new_vtable_count = old_vtable_count + miranda_methods.size();
+    miranda_methods.clear();
     vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
     if (UNLIKELY(vtable.Get() == nullptr)) {
       self->AssertPendingOOMException();
       return false;
     }
+    out = StrideIterator<ArtMethod>(
+        reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
+    size_t vtable_pos = old_vtable_count;
+    for (size_t i = old_method_count; i < new_method_count; ++i) {
+      // Leave the declaring class alone as type indices are relative to it
+      out->SetMethodIndex(0xFFFF & vtable_pos);
+      vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
+      ++out;
+      ++vtable_pos;
+    }
+    CHECK_EQ(vtable_pos, new_vtable_count);
     // Update old vtable methods.
-    for (method_idx = 0; method_idx < old_vtable_count; ++method_idx) {
-      auto* m = vtable->GetElementPtrSize<ArtMethod*>(method_idx, image_pointer_size_);
+    for (size_t i = 0; i < old_vtable_count; ++i) {
+      auto* m = vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_);
       DCHECK(m != nullptr) << PrettyClass(klass.Get());
       auto it = move_table.find(m);
       if (it != move_table.end()) {
         auto* new_m = it->second;
         DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
-        vtable->SetElementPtrSize(method_idx, new_m, image_pointer_size_);
+        vtable->SetElementPtrSize(i, new_m, image_pointer_size_);
       }
     }
-    // Update miranda methods.
-    out = StrideIterator<ArtMethod>(
-        reinterpret_cast<uintptr_t>(virtuals) + old_method_count * method_size, method_size);
-    for (; method_idx < new_vtable_count; ++method_idx) {
-      vtable->SetElementPtrSize(method_idx, &*out, image_pointer_size_);
-      ++out;
-    }
 
     klass->SetVTable(vtable.Get());
     // Go fix up all the stale miranda pointers.
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 5918c10..b465985 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -54,10 +54,6 @@
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
 
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-#endif
-
 namespace art {
 
 // The key identifying the debugger to update instrumentation.
@@ -65,22 +61,7 @@
 
 // Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol.
 static uint16_t CappedAllocRecordCount(size_t alloc_record_count) {
-  size_t cap = 0xffff;
-#ifdef HAVE_ANDROID_OS
-  // Check whether there's a system property overriding the number of recent records.
-  const char* propertyName = "dalvik.vm.recentAllocMax";
-  char recentAllocMaxString[PROPERTY_VALUE_MAX];
-  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
-    char* end;
-    size_t value = strtoul(recentAllocMaxString, &end, 10);
-    if (*end != '\0') {
-      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
-                 << "' --- invalid";
-    } else {
-      cap = value;
-    }
-  }
-#endif
+  const size_t cap = 0xffff;
   if (alloc_record_count > cap) {
     return cap;
   }
@@ -4725,7 +4706,7 @@
   gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords();
   CHECK(records != nullptr);
 
-  const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+  const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
   uint16_t count = capped_count;
 
   LOG(INFO) << "Tracked allocations, (count=" << count << ")";
@@ -4863,7 +4844,7 @@
     StringTable method_names;
     StringTable filenames;
 
-    const uint16_t capped_count = CappedAllocRecordCount(records->Size());
+    const uint16_t capped_count = CappedAllocRecordCount(records->GetRecentAllocationSize());
     uint16_t count = capped_count;
     for (auto it = records->RBegin(), end = records->REnd();
          count > 0 && it != end; count--, it++) {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 41c2417..803d58d 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -267,8 +267,8 @@
   /* TODO(iam): get rid of the unused 'false' column */ \
   V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
   V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF8, UNUSED_F8, "unused-f8", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF9, UNUSED_F9, "unused-f9", k10x, false, kUnknown, 0, kVerifyError) \
+  V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \
+  V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \
   V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xFB, UNUSED_FB, "unused-fb", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xFC, UNUSED_FC, "unused-fc", k10x, false, kUnknown, 0, kVerifyError) \
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index a385363..ac7de63 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -45,10 +45,29 @@
                  << "' --- invalid";
     } else {
       alloc_record_max_ = value;
+      if (recent_record_max_ > value) {
+        recent_record_max_ = value;
+      }
+    }
+  }
+  // Check whether there's a system property overriding the number of recent records.
+  propertyName = "dalvik.vm.recentAllocMax";
+  char recentAllocMaxString[PROPERTY_VALUE_MAX];
+  if (property_get(propertyName, recentAllocMaxString, "") > 0) {
+    char* end;
+    size_t value = strtoul(recentAllocMaxString, &end, 10);
+    if (*end != '\0') {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- invalid";
+    } else if (value > alloc_record_max_) {
+      LOG(ERROR) << "Ignoring  " << propertyName << " '" << recentAllocMaxString
+                 << "' --- should be less than " << alloc_record_max_;
+    } else {
+      recent_record_max_ = value;
     }
   }
   // Check whether there's a system property overriding the max depth of stack trace.
-  propertyName = "dalvik.vm.allocStackDepth";
+  propertyName = "debug.allocTracker.stackDepth";
   char stackDepthString[PROPERTY_VALUE_MAX];
   if (property_get(propertyName, stackDepthString, "") > 0) {
     char* end;
@@ -56,6 +75,10 @@
     if (*end != '\0') {
       LOG(ERROR) << "Ignoring  " << propertyName << " '" << stackDepthString
                  << "' --- invalid";
+    } else if (value > kMaxSupportedStackDepth) {
+      LOG(WARNING) << propertyName << " '" << stackDepthString << "' too large, using "
+                   << kMaxSupportedStackDepth;
+      max_stack_depth_ = kMaxSupportedStackDepth;
     } else {
       max_stack_depth_ = value;
     }
@@ -67,6 +90,20 @@
   STLDeleteValues(&entries_);
 }
 
+void AllocRecordObjectMap::VisitRoots(RootVisitor* visitor) {
+  CHECK_LE(recent_record_max_, alloc_record_max_);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, RootInfo(kRootDebugger));
+  size_t count = recent_record_max_;
+  // Only visit the last recent_record_max_ number of objects in entries_.
+  // They need to be retained for DDMS's recent allocation tracking.
+  // TODO: This will cause 098-ddmc test to run out of memory for GC stress test.
+  // There should be an option that do not keep these objects live if allocation tracking is only
+  // for the purpose of an HPROF dump. b/20037135
+  for (auto it = entries_.rbegin(), end = entries_.rend(); count > 0 && it != end; count--, ++it) {
+    buffered_visitor.VisitRoot(it->first);
+  }
+}
+
 void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) {
   VLOG(heap) << "Start SweepAllocationRecords()";
   size_t count_deleted = 0, count_moved = 0;
@@ -139,6 +176,7 @@
       if (self_name == "JDWP") {
         records->alloc_ddm_thread_id_ = self->GetTid();
       }
+      records->scratch_trace_.SetDepth(records->max_stack_depth_);
       size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ +
                   sizeof(AllocRecord) + sizeof(AllocRecordStackTrace);
       LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of "
@@ -181,19 +219,14 @@
 
   DCHECK_LE(records->Size(), records->alloc_record_max_);
 
-  // Remove oldest record.
-  if (records->Size() == records->alloc_record_max_) {
-    records->RemoveOldest();
-  }
-
   // Get stack trace.
-  const size_t max_depth = records->max_stack_depth_;
-  AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth);
-  // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_
+  // add scope to make "visitor" destroyed promptly, in order to set the scratch_trace_->depth_
   {
-    AllocRecordStackVisitor visitor(self, trace, max_depth);
+    AllocRecordStackVisitor visitor(self, &records->scratch_trace_, records->max_stack_depth_);
     visitor.WalkStack();
   }
+  records->scratch_trace_.SetTid(self->GetTid());
+  AllocRecordStackTrace* trace = new AllocRecordStackTrace(records->scratch_trace_);
 
   // Fill in the basics.
   AllocRecord* record = new AllocRecord(byte_count, trace);
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 45b3406..5214b6b 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -71,8 +71,15 @@
  public:
   static constexpr size_t kHashMultiplier = 17;
 
-  AllocRecordStackTrace(pid_t tid, size_t max_depth)
-      : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+  explicit AllocRecordStackTrace(size_t max_depth)
+      : tid_(0), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {}
+
+  AllocRecordStackTrace(const AllocRecordStackTrace& r)
+      : tid_(r.tid_), depth_(r.depth_), stack_(new AllocRecordStackTraceElement[r.depth_]) {
+    for (size_t i = 0; i < depth_; ++i) {
+      stack_[i] = r.stack_[i];
+    }
+  }
 
   ~AllocRecordStackTrace() {
     delete[] stack_;
@@ -82,6 +89,10 @@
     return tid_;
   }
 
+  void SetTid(pid_t t) {
+    tid_ = t;
+  }
+
   size_t GetDepth() const {
     return depth_;
   }
@@ -102,6 +113,7 @@
 
   bool operator==(const AllocRecordStackTrace& other) const {
     if (this == &other) return true;
+    if (tid_ != other.tid_) return false;
     if (depth_ != other.depth_) return false;
     for (size_t i = 0; i < depth_; ++i) {
       if (!(stack_[i] == other.stack_[i])) return false;
@@ -110,7 +122,7 @@
   }
 
  private:
-  const pid_t tid_;
+  pid_t tid_;
   size_t depth_;
   AllocRecordStackTraceElement* const stack_;
 };
@@ -200,7 +212,9 @@
 
   AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_)
       : alloc_record_max_(kDefaultNumAllocRecords),
+        recent_record_max_(kDefaultNumRecentRecords),
         max_stack_depth_(kDefaultAllocStackDepth),
+        scratch_trace_(kMaxSupportedStackDepth),
         alloc_ddm_thread_id_(0) {}
 
   ~AllocRecordObjectMap();
@@ -208,6 +222,10 @@
   void Put(mirror::Object* obj, AllocRecord* record)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    if (entries_.size() == alloc_record_max_) {
+      delete entries_.front().second;
+      entries_.pop_front();
+    }
     entries_.emplace_back(GcRoot<mirror::Object>(obj), record);
   }
 
@@ -215,17 +233,19 @@
     return entries_.size();
   }
 
-  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
+  size_t GetRecentAllocationSize() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
+    CHECK_LE(recent_record_max_, alloc_record_max_);
+    size_t sz = entries_.size();
+    return std::min(recent_record_max_, sz);
+  }
+
+  void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
-  void RemoveOldest()
+  void SweepAllocationRecords(IsMarkedCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) {
-    DCHECK(!entries_.empty());
-    delete entries_.front().second;
-    entries_.pop_front();
-  }
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
   // TODO: Is there a better way to hide the entries_'s type?
   EntryList::iterator Begin()
@@ -254,12 +274,13 @@
 
  private:
   static constexpr size_t kDefaultNumAllocRecords = 512 * 1024;
-  static constexpr size_t kDefaultAllocStackDepth = 4;
+  static constexpr size_t kDefaultNumRecentRecords = 64 * 1024 - 1;
+  static constexpr size_t kDefaultAllocStackDepth = 16;
+  static constexpr size_t kMaxSupportedStackDepth = 128;
   size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
-  // The implementation always allocates max_stack_depth_ number of frames for each stack trace.
-  // As long as the max depth is not very large, this is not a waste of memory since most stack
-  // traces will fill up the max depth number of the frames.
+  size_t recent_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_);
   size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_);
+  AllocRecordStackTrace scratch_trace_ GUARDED_BY(Locks::alloc_tracker_lock_);
   pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_);
   EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_);
 
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 6984c16..c7d2e9f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -412,6 +412,11 @@
     // the mark stack here once again.
     ProcessMarkStack();
     CheckEmptyMarkQueue();
+    if (kVerboseMode) {
+      LOG(INFO) << "AllowNewSystemWeaks";
+    }
+    Runtime::Current()->AllowNewSystemWeaks();
+    IssueEmptyCheckpoint();
     // Disable marking.
     if (kUseTableLookupReadBarrier) {
       heap_->rb_table_->ClearAll();
@@ -419,10 +424,6 @@
     }
     is_mark_queue_push_disallowed_.StoreSequentiallyConsistent(1);
     is_marking_ = false;
-    if (kVerboseMode) {
-      LOG(INFO) << "AllowNewSystemWeaks";
-    }
-    Runtime::Current()->AllowNewSystemWeaks();
     CheckEmptyMarkQueue();
   }
 
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index ee4568e..0ed3b6d 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -175,6 +175,13 @@
   } else {
     DCHECK(!IsAllocTrackingEnabled());
   }
+  if (kInstrumented) {
+    if (gc_stress_mode_) {
+      CheckGcStressMode(self, &obj);
+    }
+  } else {
+    DCHECK(!gc_stress_mode_);
+  }
   // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for
   // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
@@ -392,7 +399,7 @@
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
   // range. This also means that we rely on SetClass not dirtying the object's card.
-  return byte_count >= large_object_threshold_ && c->IsPrimitiveArray();
+  return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
 template <bool kGrow>
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 3c020e2..f0ba0bd 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 
 #include <limits>
 #include <memory>
+#include <unwind.h>  // For GC verification.
 #include <vector>
 
 #include "art_field-inl.h"
@@ -125,7 +126,8 @@
            bool ignore_max_footprint, bool use_tlab,
            bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
            bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-           bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction_for_oom,
+           bool verify_post_gc_rosalloc, bool gc_stress_mode,
+           bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
@@ -170,6 +172,7 @@
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
+      gc_stress_mode_(gc_stress_mode),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
        * verification is enabled, we limit the size of allocation stacks to speed up their
@@ -210,13 +213,17 @@
       gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount),
       blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U,
                                         kGcCountRateMaxBucketCount),
-      alloc_tracking_enabled_(false) {
+      alloc_tracking_enabled_(false),
+      backtrace_lock_(nullptr),
+      seen_backtrace_count_(0u),
+      unique_backtrace_count_(0u) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  const bool is_zygote = Runtime::Current()->IsZygote();
+  const bool is_zygote = runtime->IsZygote();
   if (!is_zygote) {
     // Background compaction is currently not supported for command line runs.
     if (background_collector_type_ != foreground_collector_type_) {
@@ -508,8 +515,12 @@
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
-  if (running_on_valgrind_) {
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+  instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
+  if (gc_stress_mode_) {
+    backtrace_lock_ = new Mutex("GC complete lock");
+  }
+  if (running_on_valgrind_ || gc_stress_mode_) {
+    instrumentation->InstrumentQuickAllocEntryPoints();
   }
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -1074,6 +1085,12 @@
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
   delete pending_task_lock_;
+  delete backtrace_lock_;
+  if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
+    LOG(INFO) << "gc stress unique=" << unique_backtrace_count_.LoadRelaxed()
+        << " total=" << seen_backtrace_count_.LoadRelaxed() +
+            unique_backtrace_count_.LoadRelaxed();
+  }
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -3681,6 +3698,15 @@
   allocation_records_.reset(records);
 }
 
+void Heap::VisitAllocationRecords(RootVisitor* visitor) const {
+  if (IsAllocTrackingEnabled()) {
+    MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
+    if (IsAllocTrackingEnabled()) {
+      GetAllocationRecords()->VisitRoots(visitor);
+    }
+  }
+}
+
 void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const {
   if (IsAllocTrackingEnabled()) {
     MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_);
@@ -3690,5 +3716,73 @@
   }
 }
 
+// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
+class StackCrawlState {
+ public:
+  StackCrawlState(uintptr_t* frames, size_t max_depth, size_t skip_count)
+      : frames_(frames), frame_count_(0), max_depth_(max_depth), skip_count_(skip_count) {
+  }
+  size_t GetFrameCount() const {
+    return frame_count_;
+  }
+  static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) {
+    auto* const state = reinterpret_cast<StackCrawlState*>(arg);
+    const uintptr_t ip = _Unwind_GetIP(context);
+    // The first stack frame is get_backtrace itself. Skip it.
+    if (ip != 0 && state->skip_count_ > 0) {
+      --state->skip_count_;
+      return _URC_NO_REASON;
+    }
+    // ip may be off for ARM but it shouldn't matter since we only use it for hashing.
+    state->frames_[state->frame_count_] = ip;
+    state->frame_count_++;
+    return state->frame_count_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON;
+  }
+
+ private:
+  uintptr_t* const frames_;
+  size_t frame_count_;
+  const size_t max_depth_;
+  size_t skip_count_;
+};
+
+static size_t get_backtrace(uintptr_t* frames, size_t max_depth) {
+  StackCrawlState state(frames, max_depth, 0u);
+  _Unwind_Backtrace(&StackCrawlState::Callback, &state);
+  return state.GetFrameCount();
+}
+
+void Heap::CheckGcStressMode(Thread* self, mirror::Object** obj) {
+  auto* const runtime = Runtime::Current();
+  if (gc_stress_mode_ && runtime->GetClassLinker()->IsInitialized() &&
+      !runtime->IsActiveTransaction() && mirror::Class::HasJavaLangClass()) {
+    // Check if we should GC.
+    bool new_backtrace = false;
+    {
+      static constexpr size_t kMaxFrames = 16u;
+      uintptr_t backtrace[kMaxFrames];
+      const size_t frames = get_backtrace(backtrace, kMaxFrames);
+      uint64_t hash = 0;
+      for (size_t i = 0; i < frames; ++i) {
+        hash = hash * 2654435761 + backtrace[i];
+        hash += (hash >> 13) ^ (hash << 6);
+      }
+      MutexLock mu(self, *backtrace_lock_);
+      new_backtrace = seen_backtraces_.find(hash) == seen_backtraces_.end();
+      if (new_backtrace) {
+        seen_backtraces_.insert(hash);
+      }
+    }
+    if (new_backtrace) {
+      StackHandleScope<1> hs(self);
+      auto h = hs.NewHandleWrapper(obj);
+      CollectGarbage(false);
+      unique_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    } else {
+      seen_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    }
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 18244c8..54a3235 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -19,6 +19,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "allocator_type.h"
@@ -181,7 +182,8 @@
                 bool ignore_max_footprint, bool use_tlab,
                 bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
                 bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-                bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction,
+                bool verify_post_gc_rosalloc, bool gc_stress_mode,
+                bool use_homogeneous_space_compaction,
                 uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
   ~Heap();
@@ -703,6 +705,9 @@
   void SetAllocationRecords(AllocRecordObjectMap* records)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_);
 
+  void VisitAllocationRecords(RootVisitor* visitor) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -909,6 +914,10 @@
 
   void UpdateGcCountRateHistograms() EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
+  // GC stress mode attempts to do one GC per unique backtrace.
+  void CheckGcStressMode(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -1064,6 +1073,7 @@
   bool verify_pre_gc_rosalloc_;
   bool verify_pre_sweeping_rosalloc_;
   bool verify_post_gc_rosalloc_;
+  const bool gc_stress_mode_;
 
   // RAII that temporarily disables the rosalloc verification during
   // the zygote fork.
@@ -1219,6 +1229,14 @@
   std::unique_ptr<AllocRecordObjectMap> allocation_records_
       GUARDED_BY(Locks::alloc_tracker_lock_);
 
+  // GC stress related data structures.
+  Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Debugging variables, seen backtraces vs unique backtraces.
+  Atomic<uint64_t> seen_backtrace_count_;
+  Atomic<uint64_t> unique_backtrace_count_;
+  // Stack trace hashes that we already saw,
+  std::unordered_set<uint64_t> seen_backtraces_ GUARDED_BY(backtrace_lock_);
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index d6146f3..bb604f0 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -46,7 +46,9 @@
   kRootMonitorUsed,
   kRootThreadObject,
   kRootInternedString,
+  kRootFinalizing,  // used for HPROF's conversion to HprofHeapTag
   kRootDebugger,
+  kRootReferenceCleanup,  // used for HPROF's conversion to HprofHeapTag
   kRootVMInternal,
   kRootJNIMonitor,
 };
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index b21103b..a12a58d 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -35,6 +35,7 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
@@ -131,6 +132,23 @@
   return success;
 }
 
+// Write out the 'ArtMethod*' into vreg and vreg+1
+static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame,
+                                               const ArtMethod& called_method,
+                                               uint32_t vreg) {
+  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
+  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method));
+  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method)
+                                                    >> BitSizeOf<uint32_t>());
+  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+
+  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+
+  shadow_frame.SetVReg(vreg, called_method_lo);
+  shadow_frame.SetVReg(vreg + 1, called_method_hi);
+}
+
 // Handles create-lambda instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 // (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
@@ -161,20 +179,43 @@
     return false;
   }
 
-  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
-  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(called_method));
-  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(called_method)
-                                                    >> BitSizeOf<uint32_t>());
-  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-
-  DCHECK_NE(called_method_lo | called_method_hi, 0u);
-
-  shadow_frame.SetVReg(vregA, called_method_lo);
-  shadow_frame.SetVReg(vregA + 1, called_method_hi);
+  WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA);
   return true;
 }
 
+// Reads out the 'ArtMethod*' stored inside of vreg and vreg+1
+//
+// Validates that the art method points to a valid lambda function, otherwise throws
+// an exception and returns null.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame,
+                                                           uint32_t vreg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
+  // instead of just an ArtMethod.
+  // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit,
+  // but once proper variable capture is implemented it will only use 1 vreg.
+  uint32_t vc_value_lo = shadow_frame.GetVReg(vreg);
+  uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1);
+
+  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
+                           | vc_value_lo;
+
+  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+
+  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
+  if (UNLIKELY(called_method == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return nullptr;
+  } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    return nullptr;
+  }
+
+  return called_method;
+}
+
 template<bool do_access_check>
 static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
                                   uint16_t inst_data, JValue* result) {
@@ -188,34 +229,18 @@
    * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
    */
   uint32_t vC = inst->VRegC_25x();
+  ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC);
 
-  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
-  // instead of just an ArtMethod. We also should only need to use 1 register instead of 2.
-  uint32_t vc_value_lo = shadow_frame.GetVReg(vC);
-  uint32_t vc_value_hi = shadow_frame.GetVReg(vC + 1);
-
-  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
-                           | vc_value_lo;
-
-  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
-  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
-  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
-
-  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
+  // Failed lambda target runtime check, an exception was raised.
   if (UNLIKELY(called_method == nullptr)) {
-    ThrowNullPointerExceptionFromInterpreter();
-    result->SetJ(0);
-    return false;
-  }
-
-  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else {
-    return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
-                                                result);
   }
+
+  // Invoke a non-range lambda
+  return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
+                                              result);
 }
 
 // Handles invoke-XXX/range instructions.
@@ -469,6 +494,89 @@
   return 3;
 }
 
+template <bool _do_check>
+static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                               uint16_t inst_data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  /*
+   * box-lambda vA, vB /// opcode 0xf8, format 22x
+   * - vA is the target register where the Object representation of the closure will be stored into
+   * - vB is a closure (made by create-lambda)
+   *   (also reads vB + 1)
+   */
+  uint32_t vreg_target_object = inst->VRegA_22x(inst_data);
+  uint32_t vreg_source_closure = inst->VRegB_22x();
+
+  ArtMethod* const closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame,
+                                                                      vreg_source_closure);
+
+  // Failed lambda target runtime check, an exception was raised.
+  if (UNLIKELY(closure_method == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  // Convert the ArtMethod into a java.lang.reflect.Method which will serve
+  // as the temporary 'boxed' version of the lambda. This is good enough
+  // to check all the basic object identities that a boxed lambda must retain.
+
+  // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class
+  // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object
+  // TODO: Repeated boxing should return the same object reference
+  mirror::Method* method_as_object =
+      mirror::Method::CreateFromArtMethod(self, closure_method);
+
+  if (UNLIKELY(method_as_object == nullptr)) {
+    // Most likely an OOM has occurred.
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  shadow_frame.SetVRegReference(vreg_target_object, method_as_object);
+  return true;
+}
+
+template <bool _do_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool DoUnboxLambda(Thread* self ATTRIBUTE_UNUSED,
+                                 ShadowFrame& shadow_frame,
+                                 const Instruction* inst,
+                                 uint16_t inst_data) {
+  /*
+   * unbox-lambda vA, vB, [type id] /// opcode 0xf9, format 22c
+   * - vA is the target register where the closure will be written into
+   *   (also writes vA + 1)
+   * - vB is the Object representation of the closure (made by box-lambda)
+   */
+  uint32_t vreg_target_closure = inst->VRegA_22c(inst_data);
+  uint32_t vreg_source_object = inst->VRegB_22c();
+
+  // Raise NullPointerException if object is null
+  mirror::Object* boxed_closure_object = shadow_frame.GetVRegReference(vreg_source_object);
+  if (UNLIKELY(boxed_closure_object == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    return false;
+  }
+
+  // Raise ClassCastException if object is not instanceof java.lang.reflect.Method
+  if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) {
+    ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass());
+    return false;
+  }
+
+  // TODO(iam): We must check that the closure object extends/implements the type
+  // specified in [type id]. This is not currently implemented since it's always a Method.
+
+  // If we got this far, the inputs are valid.
+  // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target.
+  mirror::AbstractMethod* boxed_closure_as_method =
+      down_cast<mirror::AbstractMethod*>(boxed_closure_object);
+
+  ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod();
+  DCHECK(unboxed_closure != nullptr);
+
+  WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure);
+  return true;
+}
+
 uint32_t FindNextInstructionFollowingException(Thread* self, ShadowFrame& shadow_frame,
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -561,6 +669,26 @@
 EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
 #undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
 
+// Explicitly instantiate all DoBoxLambda functions.
+#define EXPLICIT_DO_BOX_LAMBDA_DECL(_do_check)                                                \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+bool DoBoxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                            uint16_t inst_data);
+
+EXPLICIT_DO_BOX_LAMBDA_DECL(false);  // box-lambda
+EXPLICIT_DO_BOX_LAMBDA_DECL(true);   // box-lambda
+#undef EXPLICIT_DO_BOX_LAMBDA_DECL
+
+// Explicitly instantiate all DoUnBoxLambda functions.
+#define EXPLICIT_DO_UNBOX_LAMBDA_DECL(_do_check)                                                \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                            \
+bool DoUnboxLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                              uint16_t inst_data);
+
+EXPLICIT_DO_UNBOX_LAMBDA_DECL(false);  // unbox-lambda
+EXPLICIT_DO_UNBOX_LAMBDA_DECL(true);   // unbox-lambda
+#undef EXPLICIT_DO_BOX_LAMBDA_DECL
+
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 7bc8c15..ec923b6 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -2415,6 +2415,18 @@
   }
   HANDLE_EXPERIMENTAL_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(BOX_LAMBDA) {
+    bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(UNBOX_LAMBDA) {
+    bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2459,14 +2471,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F8)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
-  HANDLE_INSTRUCTION_START(UNUSED_F9)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_FA)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 8040197..78090bb 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -2245,7 +2245,7 @@
       }
       case Instruction::UNUSED_F4:
       case Instruction::UNUSED_F5:
-      case Instruction::UNUSED_F7 ... Instruction::UNUSED_F9: {
+      case Instruction::UNUSED_F7: {
         if (!IsExperimentalInstructionEnabled(inst)) {
           UnexpectedOpcode(inst, shadow_frame);
         }
@@ -2253,6 +2253,26 @@
         CHECK(false);  // TODO(iam): Implement opcodes for lambdas
         break;
       }
+      case Instruction::BOX_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoBoxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNBOX_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoUnboxLambda<do_access_check>(self, shadow_frame, inst, inst_data);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
       case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 8c9222f..9ffa4c2 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -757,7 +757,7 @@
 }
 
 inline void Class::SetSlowPath(bool enabled) {
-  SetFieldBoolean<false>(GetSlowPathFlagOffset(), enabled);
+  SetFieldBoolean<false, false>(GetSlowPathFlagOffset(), enabled);
 }
 
 inline void Class::InitializeClassVisitor::operator()(
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 551e7e2..ba0a9fc 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1030,10 +1030,14 @@
   }
 
   static Class* GetJavaLangClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_Class_.IsNull());
+    DCHECK(HasJavaLangClass());
     return java_lang_Class_.Read();
   }
 
+  static bool HasJavaLangClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return !java_lang_Class_.IsNull();
+  }
+
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
   static void SetClassClass(Class* java_lang_Class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void ResetClass();
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 8d9c08d..b689057 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -162,8 +162,8 @@
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   return down_cast<String*>(
-      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, string_class, size,
-                                                             allocator_type, pre_fence_visitor));
+      heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, string_class, size,
+                                                            allocator_type, pre_fence_visitor));
 }
 
 template <bool kIsInstrumented>
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7a78928..d0f01b3 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -144,7 +144,10 @@
 };
 
 Runtime::Runtime()
-    : instruction_set_(kNone),
+    : resolution_method_(nullptr),
+      imt_conflict_method_(nullptr),
+      imt_unimplemented_method_(nullptr),
+      instruction_set_(kNone),
       compiler_callbacks_(nullptr),
       is_zygote_(false),
       must_relocate_(false),
@@ -873,6 +876,7 @@
                        xgc_option.verify_pre_gc_rosalloc_,
                        xgc_option.verify_pre_sweeping_rosalloc_,
                        xgc_option.verify_post_gc_rosalloc_,
+                       xgc_option.gcstress_,
                        runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM),
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
   ATRACE_END();
@@ -1378,6 +1382,7 @@
 void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
   intern_table_->VisitRoots(visitor, flags);
   class_linker_->VisitRoots(visitor, flags);
+  heap_->VisitAllocationRecords(visitor);
   if ((flags & kVisitRootFlagNewRoots) == 0) {
     // Guaranteed to have no new roots in the constant roots.
     VisitConstantRoots(visitor);
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index a680302..d8d21aa 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -22,7 +22,7 @@
 namespace art {
 
 template<typename T>
-class StrideIterator : public std::iterator<std::random_access_iterator_tag, T> {
+class StrideIterator : public std::iterator<std::forward_iterator_tag, T> {
  public:
   StrideIterator(const StrideIterator&) = default;
   StrideIterator(StrideIterator&&) = default;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index de51fe0..ddd1caa 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -172,6 +172,15 @@
                                                         bool allow_soft_failures,
                                                         std::string* error) {
   DCHECK(class_def != nullptr);
+
+  // A class must not be abstract and final.
+  if ((class_def->access_flags_ & (kAccAbstract | kAccFinal)) == (kAccAbstract | kAccFinal)) {
+    *error = "Verifier rejected class ";
+    *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
+    *error += ": class is abstract and final.";
+    return kHardFailure;
+  }
+
   const uint8_t* class_data = dex_file->GetClassData(*class_def);
   if (class_data == nullptr) {
     // empty class, probably a marker interface
@@ -1553,6 +1562,27 @@
   return true;
 }
 
+// Returns the index of the first final instance field of the given class, or kDexNoIndex if there
+// is no such field.
+static uint32_t GetFirstFinalInstanceFieldIndex(const DexFile& dex_file, uint16_t type_idx) {
+  const DexFile::ClassDef* class_def = dex_file.FindClassDef(type_idx);
+  DCHECK(class_def != nullptr);
+  const uint8_t* class_data = dex_file.GetClassData(*class_def);
+  DCHECK(class_data != nullptr);
+  ClassDataItemIterator it(dex_file, class_data);
+  // Skip static fields.
+  while (it.HasNextStaticField()) {
+    it.Next();
+  }
+  while (it.HasNextInstanceField()) {
+    if ((it.GetFieldAccessFlags() & kAccFinal) != 0) {
+      return it.GetMemberIndex();
+    }
+    it.Next();
+  }
+  return DexFile::kDexNoIndex;
+}
+
 bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) {
   // If we're doing FindLocksAtDexPc, check whether we're at the dex pc we care about.
   // We want the state _before_ the instruction, for the case where the dex pc we're
@@ -2767,6 +2797,17 @@
     case Instruction::RETURN_VOID_NO_BARRIER:
       if (IsConstructor() && !IsStatic()) {
         auto& declaring_class = GetDeclaringClass();
+        if (declaring_class.IsUnresolvedReference()) {
+          // We must iterate over the fields, even if we cannot use mirror classes to do so. Do it
+          // manually over the underlying dex file.
+          uint32_t first_index = GetFirstFinalInstanceFieldIndex(*dex_file_,
+              dex_file_->GetMethodId(dex_method_idx_).class_idx_);
+          if (first_index != DexFile::kDexNoIndex) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected for field "
+                              << first_index;
+          }
+          break;
+        }
         auto* klass = declaring_class.GetClass();
         for (uint32_t i = 0, num_fields = klass->NumInstanceFields(); i < num_fields; ++i) {
           if (klass->GetInstanceField(i)->IsFinal()) {
@@ -2853,11 +2894,29 @@
       break;
     }
 
-    case 0xf4:
-    case 0xf5:
-    case 0xf7 ... 0xf9: {
+    case Instruction::UNUSED_F4:
+    case Instruction::UNUSED_F5:
+    case Instruction::UNUSED_F7: {
       DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
-      FALLTHROUGH_INTENDED;  // Conservatively fail verification on release builds.
+      // Conservatively fail verification on release builds.
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
+      break;
+    }
+
+    case Instruction::BOX_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement box-lambda verification
+      break;
+    }
+
+     case Instruction::UNBOX_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement unbox-lambda verification
+      break;
     }
 
     /* These should never appear during verification. */
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index 4914ba2..f41ff2a 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -43,24 +43,14 @@
 
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
-        // TODO: Temporary fix. Keep the new objects live so they are not garbage collected.
-        // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of
-        // getRecentAllocations(). Working on restoring its old behaviour. b/20037135
-        Object[] objects = new Object[overflowAllocations];
         for (int i = 0; i < overflowAllocations; i++) {
-            objects[i] = new Object();
+            new Object();
         }
         Allocations after = new Allocations(DdmVmInternal.getRecentAllocations());
         System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations));
         System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries));
         System.out.println("after.numberOfEntries=" + after.numberOfEntries);
 
-        // TODO: Temporary fix as above. b/20037135
-        objects = null;
-        Runtime.getRuntime().gc();
-        final int fillerStrings = 16 * 1024;
-        String[] strings = new String[fillerStrings];
-
         System.out.println("Disable and confirm back to empty");
         DdmVmInternal.enableRecentAllocations(false);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
@@ -76,8 +66,8 @@
         System.out.println("Confirm we can reenable twice in a row without losing allocations");
         DdmVmInternal.enableRecentAllocations(true);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
-        for (int i = 0; i < fillerStrings; i++) {
-            strings[i] = new String("fnord");
+        for (int i = 0; i < 16 * 1024; i++) {
+            new String("fnord");
         }
         Allocations first = new Allocations(DdmVmInternal.getRecentAllocations());
         DdmVmInternal.enableRecentAllocations(true);
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
index ed1f875..0a5b5fd 100644
--- a/test/955-lambda-smali/expected.txt
+++ b/test/955-lambda-smali/expected.txt
@@ -2,3 +2,7 @@
 Hello world! (0-args, no closure)
 ABCD Hello world! (4-args, no closure)
 Caught NPE
+(BoxUnbox) Hello boxing world! (0-args, no closure)
+(BoxUnbox) Caught NPE for unbox-lambda
+(BoxUnbox) Caught NPE for box-lambda
+(BoxUnbox) Caught ClassCastException for unbox-lambda
diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali
new file mode 100644
index 0000000..5e66733
--- /dev/null
+++ b/test/955-lambda-smali/smali/BoxUnbox.smali
@@ -0,0 +1,118 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LBoxUnbox;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 2
+    # Trivial 0-arg hello world
+    create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    invoke-static {}, LBoxUnbox;->testFailures()V
+    invoke-static {}, LBoxUnbox;->testFailures2()V
+    invoke-static {}, LBoxUnbox;->testFailures3()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    .registers 3 # 1 parameters, 2 locals
+
+    const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    # attempting to unbox a null lambda will throw NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "(BoxUnbox) Caught NPE for unbox-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures2()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    box-lambda v2, v0  # attempting to box a null lambda will throw NPE
+:end
+    return-void
+
+    # TODO: refactor testFailures using a goto
+
+:handler
+    const-string v2, "(BoxUnbox) Caught NPE for box-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures3()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const-string v0, "This is not a boxed lambda"
+:start
+    # TODO: use \FunctionalType; here instead
+    unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod;
+    # can't use a string, expects a lambda object here. throws ClassCastException.
+:end
+    return-void
+
+    # TODO: refactor testFailures using a goto
+
+:handler
+    const-string v2, "(BoxUnbox) Caught ClassCastException for unbox-lambda"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/ClassCastException; {:start .. :end} :handler
+.end method
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
index 1851399..92afd79 100644
--- a/test/955-lambda-smali/smali/Main.smali
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -22,6 +22,7 @@
 
     invoke-static {}, LSanityCheck;->run()I
     invoke-static {}, LTrivialHelloWorld;->run()V
+    invoke-static {}, LBoxUnbox;->run()V
 
 # TODO: add tests when verification fails
 
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 60165d9..ac9656b 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -229,10 +229,19 @@
 
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
-# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES))
+# Tests that are broken with GC stress.
+# 098-ddmc is broken until the allocation tracker does not mark recently allocated objects as roots.
+# Marking them roots is for consistent behavior with DDMS's getRecentAllocations(). b/20037135
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
+  098-ddmc
+
+ifneq (,$(filter gcstress,$(GC_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
+      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
+endif
+
+TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
 
 # 115-native-bridge setup is complicated. Need to implement it correctly for the target.
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \