Save/restore local table state on down calls.

Also add unit test.

Change-Id: Ia1dc54eaac20c4bbb1ca3d9ac2933d6ab0241261
diff --git a/src/assembler.h b/src/assembler.h
index 0808e06..7e14f3a 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -360,6 +360,15 @@
   virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch,
                     unsigned int size) = 0;
 
+  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(ThreadOffset dest_base, Offset dest_offset, FrameOffset src,
+                    ManagedRegister scratch, ManagedRegister scratch2, size_t size) = 0;
+
   virtual void MemoryBarrier(ManagedRegister scratch) = 0;
 
   // Exploit fast access in managed code to Thread::Current()
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index 6d51867..6c57bad 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -1645,28 +1645,45 @@
   }
 }
 
-void ArmAssembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch, size_t size) {
+void ArmAssembler::Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister());
   CHECK(size == 4 || size == 8);
   if (size == 4) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                   SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                  SP, dest.Int32Value());
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
   } else if (size == 8) {
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                   SP, src.Int32Value());
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                  SP, dest.Int32Value());
-    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
-                   SP, src.Int32Value() + 4);
-    StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
-                  SP, dest.Int32Value() + 4);
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value());
+    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
+    LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), SP, src.Int32Value() + 4);
+    StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value() + 4);
   }
 }
 
+void ArmAssembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                        ManagedRegister mscratch, size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  LoadFromOffset(kLoadWord, scratch, src_base.AsArm().AsCoreRegister(), src_offset.Int32Value());
+  StoreToOffset(kStoreWord, scratch, SP, dest.Int32Value());
+}
+
+void ArmAssembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                        ManagedRegister mscratch, size_t size) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void ArmAssembler::Copy(ThreadOffset dest_base, Offset dest_offset, FrameOffset src,
+                        ManagedRegister mscratch, ManagedRegister mscratch2, size_t size) {
+  Register scratch = mscratch.AsArm().AsCoreRegister();
+  Register scratch2 = mscratch2.AsArm().AsCoreRegister();
+  CHECK_EQ(size, 4u);
+  LoadFromOffset(kLoadWord, scratch, TR, dest_base.Int32Value());
+  LoadFromOffset(kLoadWord, scratch2, SP, src.Int32Value());
+  StoreToOffset(kStoreWord, scratch2, scratch, dest_offset.Int32Value());
+}
+
+
 void ArmAssembler::MemoryBarrier(ManagedRegister mscratch) {
 #if ANDROID_SMP != 0
 #if defined(__ARM_HAVE_DMB)
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index 577fd55..69b39c0 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -481,6 +481,15 @@
   virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch,
                     unsigned int size);
 
+  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(ThreadOffset dest_base, Offset dest_offset, FrameOffset src,
+                    ManagedRegister scratch, ManagedRegister scratch2, size_t size);
+
   virtual void MemoryBarrier(ManagedRegister scratch);
 
   // Exploit fast access in managed code to Thread::Current()
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index 389fc2c..1fce408 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -1585,6 +1585,30 @@
   }
 }
 
+void X86Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                        ManagedRegister scratch, size_t size) {
+  UNIMPLEMENTED(FATAL);
+}
+
+void X86Assembler::Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                        ManagedRegister mscratch, size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK_EQ(size, 4u);
+  movl(scratch, Address(ESP, src_base));
+  movl(scratch, Address(scratch, src_offset));
+  movl(Address(ESP, dest), scratch);
+}
+
+void X86Assembler::Copy(ThreadOffset dest_base, Offset dest_offset, FrameOffset src,
+                        ManagedRegister mscratch, ManagedRegister mscratch2, size_t size) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  CHECK(mscratch2.IsNoRegister());
+  CHECK_EQ(size, 4u);
+  fs()->movl(scratch, Address::Absolute(dest_base));
+  pushl(Address(ESP, src));
+  popl(Address(scratch, dest_offset));
+}
+
 void X86Assembler::MemoryBarrier(ManagedRegister) {
 #if ANDROID_SMP != 0
   EmitUint8(0x0F);  // mfence
diff --git a/src/assembler_x86.h b/src/assembler_x86.h
index 7152e14..5936aac 100644
--- a/src/assembler_x86.h
+++ b/src/assembler_x86.h
@@ -505,6 +505,15 @@
   virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch,
                     size_t size);
 
+  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
+                    ManagedRegister scratch, size_t size);
+
+  virtual void Copy(ThreadOffset dest_base, Offset dest_offset, FrameOffset src,
+                    ManagedRegister scratch, ManagedRegister scratch2, size_t size);
+
   virtual void MemoryBarrier(ManagedRegister);
 
   // Exploit fast access in managed code to Thread::Current()
diff --git a/src/calling_convention.cc b/src/calling_convention.cc
index 49b4348..c0556c5 100644
--- a/src/calling_convention.cc
+++ b/src/calling_convention.cc
@@ -77,17 +77,22 @@
   }
 }
 
-size_t JniCallingConvention::ReferenceCount() {
+size_t JniCallingConvention::ReferenceCount() const {
   const Method* method = GetMethod();
   return method->NumReferenceArgs() + (method->IsStatic() ? 1 : 0);
 }
 
-FrameOffset JniCallingConvention::ReturnValueSaveLocation() {
+FrameOffset JniCallingConvention::LocalReferenceTable_SegmentStatesOffset() const {
   size_t start_of_sirt = SirtLinkOffset().Int32Value() +  kPointerSize;
   size_t references_size = kPointerSize * ReferenceCount();  // size excluding header
   return FrameOffset(start_of_sirt + references_size);
 }
 
+FrameOffset JniCallingConvention::ReturnValueSaveLocation() const {
+  // Segment state is 4 bytes long
+  return FrameOffset(LocalReferenceTable_SegmentStatesOffset().Int32Value() + 4);
+}
+
 bool JniCallingConvention::HasNext() {
   if (itr_args_ <= kObjectOrClass) {
     return true;
diff --git a/src/calling_convention.h b/src/calling_convention.h
index 8f2a79c..cf03bb8 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -105,6 +105,7 @@
 // | { Return address }              |
 // | { Callee saves }                |     ([1])
 // | { Return value spill }          |     (live on return slow paths)
+// | { Local Ref. Table State }      |
 // | { Stack Indirect Ref. Table     |
 // |   num. refs./link }             |     (here to prior SP is frame size)
 // | { Method* }                     | <-- Anchor SP written to thread
@@ -127,10 +128,12 @@
   // Size of outgoing arguments, including alignment
   virtual size_t OutArgSize() = 0;
   // Number of references in stack indirect reference table
-  size_t ReferenceCount();
+  size_t ReferenceCount() const;
+  // Location where the segment state of the local indirect reference table is saved
+  FrameOffset LocalReferenceTable_SegmentStatesOffset() const;
   // Location where the return value of a call can be squirreled if another
   // call is made following the native call
-  FrameOffset ReturnValueSaveLocation();
+  FrameOffset ReturnValueSaveLocation() const;
 
   // Callee save registers to spill prior to native code (which may clobber)
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
@@ -143,6 +146,9 @@
   // set up
   virtual bool IsMethodRegisterClobberedPreCall() = 0;
 
+  // An extra scratch register live after the call
+  virtual ManagedRegister ReturnScratchRegister() const = 0;
+
   // Iterator interface
   bool HasNext();
   virtual void Next();
@@ -157,15 +163,15 @@
   FrameOffset CurrentParamSirtEntryOffset();
 
   // Position of SIRT and interior fields
-  FrameOffset SirtOffset() {
+  FrameOffset SirtOffset() const {
     return FrameOffset(displacement_.Int32Value() +
                        kPointerSize);  // above Method*
   }
-  FrameOffset SirtNumRefsOffset() {
+  FrameOffset SirtNumRefsOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
                        StackIndirectReferenceTable::NumberOfReferencesOffset());
   }
-  FrameOffset SirtLinkOffset() {
+  FrameOffset SirtLinkOffset() const {
     return FrameOffset(SirtOffset().Int32Value() +
                        StackIndirectReferenceTable::LinkOffset());
   }
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index ed876f0..ae44b83 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -150,14 +150,17 @@
   return result;
 }
 
+ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
+  return ArmManagedRegister::FromCoreRegister(R2);
+}
+
 size_t ArmJniCallingConvention::FrameSize() {
-  // Method*, LR and callee save area size
-  size_t frame_data_size = (2 + CalleeSaveRegisters().size()) * kPointerSize;
+  // Method*, LR and callee save area size, local reference segment state
+  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kPointerSize;
   // References plus 2 words for SIRT header
   size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(),
-                 kStackAlignment);
+  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t ArmJniCallingConvention::OutArgSize() {
diff --git a/src/calling_convention_arm.h b/src/calling_convention_arm.h
index 3d513fe..984250e 100644
--- a/src/calling_convention_arm.h
+++ b/src/calling_convention_arm.h
@@ -42,6 +42,7 @@
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
     return callee_save_regs_;
   }
+  virtual ManagedRegister ReturnScratchRegister() const;
   virtual uint32_t CoreSpillMask() const;
   virtual uint32_t FpSpillMask() const {
     return 0;  // Floats aren't spilled in JNI down call
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index 9598647..ad7921d 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -18,6 +18,10 @@
   return X86ManagedRegister::FromCpuRegister(ECX);
 }
 
+ManagedRegister X86JniCallingConvention::ReturnScratchRegister() const {
+  return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
+}
+
 static ManagedRegister ReturnRegisterForMethod(Method* method) {
   if (method->IsReturnAFloatOrDouble()) {
     return X86ManagedRegister::FromX87Register(ST0);
@@ -68,13 +72,12 @@
 std::vector<ManagedRegister> X86JniCallingConvention::callee_save_regs_;
 
 size_t X86JniCallingConvention::FrameSize() {
-  // Return address and Method*
-  size_t frame_data_size = 2 * kPointerSize;
+  // Return address, Method* and local reference segment state
+  size_t frame_data_size = 3 * kPointerSize;
   // References plus 2 words for SIRT header
   size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
   // Plus return value spill area size
-  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(),
-                 kStackAlignment);
+  return RoundUp(frame_data_size + sirt_size + SizeOfReturnValue(), kStackAlignment);
 }
 
 size_t X86JniCallingConvention::OutArgSize() {
diff --git a/src/calling_convention_x86.h b/src/calling_convention_x86.h
index c3bdc40..cb27ef6 100644
--- a/src/calling_convention_x86.h
+++ b/src/calling_convention_x86.h
@@ -43,6 +43,7 @@
     DCHECK(callee_save_regs_.empty());
     return callee_save_regs_;
   }
+  virtual ManagedRegister ReturnScratchRegister() const;
   virtual uint32_t CoreSpillMask() const {
     return 0;
   }
diff --git a/src/indirect_reference_table.cc b/src/indirect_reference_table.cc
index e3632a6..ed27bf4 100644
--- a/src/indirect_reference_table.cc
+++ b/src/indirect_reference_table.cc
@@ -48,7 +48,7 @@
   slot_data_ = reinterpret_cast<IndirectRefSlot*>(calloc(initialCount, sizeof(IndirectRefSlot)));
   CHECK(slot_data_ != NULL);
 
-  segmentState.all = IRT_FIRST_SEGMENT;
+  segment_state_.all = IRT_FIRST_SEGMENT;
   alloc_entries_ = initialCount;
   max_entries_ = maxCount;
   kind_ = desiredKind;
@@ -81,14 +81,14 @@
 IndirectRef IndirectReferenceTable::Add(uint32_t cookie, const Object* obj) {
   IRTSegmentState prevState;
   prevState.all = cookie;
-  size_t topIndex = segmentState.parts.topIndex;
+  size_t topIndex = segment_state_.parts.topIndex;
 
   DCHECK(obj != NULL);
   // TODO: stronger sanity check on the object (such as in heap)
   DCHECK(IsAligned(reinterpret_cast<intptr_t>(obj), 8));
   DCHECK(table_ != NULL);
   DCHECK_LE(alloc_entries_, max_entries_);
-  DCHECK_GE(segmentState.parts.numHoles, prevState.parts.numHoles);
+  DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
   if (topIndex == alloc_entries_) {
     /* reached end of allocated space; did we hit buffer max? */
@@ -128,7 +128,7 @@
    * add to the end of the list.
    */
   IndirectRef result;
-  int numHoles = segmentState.parts.numHoles - prevState.parts.numHoles;
+  int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
   if (numHoles > 0) {
     DCHECK_GT(topIndex, 1U);
     /* find the first hole; likely to be near the end of the list */
@@ -140,13 +140,13 @@
     UpdateSlotAdd(obj, pScan - table_);
     result = ToIndirectRef(obj, pScan - table_);
     *pScan = obj;
-    segmentState.parts.numHoles--;
+    segment_state_.parts.numHoles--;
   } else {
     /* add to the end */
     UpdateSlotAdd(obj, topIndex);
     result = ToIndirectRef(obj, topIndex);
     table_[topIndex++] = obj;
-    segmentState.parts.topIndex = topIndex;
+    segment_state_.parts.topIndex = topIndex;
   }
 
   DCHECK(result != NULL);
@@ -169,7 +169,7 @@
     return false;
   }
 
-  int topIndex = segmentState.parts.topIndex;
+  int topIndex = segment_state_.parts.topIndex;
   int idx = ExtractIndex(iref);
   if (idx >= topIndex) {
     /* bad -- stale reference? */
@@ -201,7 +201,7 @@
 }
 
 bool IndirectReferenceTable::Contains(IndirectRef iref) const {
-  return LinearScan(iref, 0, segmentState.parts.topIndex, table_) != -1;
+  return LinearScan(iref, 0, segment_state_.parts.topIndex, table_) != -1;
 }
 
 /*
@@ -220,12 +220,12 @@
 bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
   IRTSegmentState prevState;
   prevState.all = cookie;
-  int topIndex = segmentState.parts.topIndex;
+  int topIndex = segment_state_.parts.topIndex;
   int bottomIndex = prevState.parts.topIndex;
 
   DCHECK(table_ != NULL);
   DCHECK_LE(alloc_entries_, max_entries_);
-  DCHECK_GE(segmentState.parts.numHoles, prevState.parts.numHoles);
+  DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
   int idx = ExtractIndex(iref);
 
@@ -257,7 +257,7 @@
     }
 
     table_[idx] = NULL;
-    int numHoles = segmentState.parts.numHoles - prevState.parts.numHoles;
+    int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
     if (numHoles != 0) {
       while (--topIndex > bottomIndex && numHoles != 0) {
         //LOG(INFO) << "+++ checking for hole at " << topIndex-1 << " (cookie=" << cookie << ") val=" << table_[topIndex-1];
@@ -267,10 +267,10 @@
         //LOG(INFO) << "+++ ate hole at " << (topIndex-1);
         numHoles--;
       }
-      segmentState.parts.numHoles = numHoles + prevState.parts.numHoles;
-      segmentState.parts.topIndex = topIndex;
+      segment_state_.parts.numHoles = numHoles + prevState.parts.numHoles;
+      segment_state_.parts.topIndex = topIndex;
     } else {
-      segmentState.parts.topIndex = topIndex-1;
+      segment_state_.parts.topIndex = topIndex-1;
       //LOG(INFO) << "+++ ate last entry " << topIndex-1;
     }
   } else {
@@ -288,8 +288,8 @@
     }
 
     table_[idx] = NULL;
-    segmentState.parts.numHoles++;
-    //LOG(INFO) << "+++ left hole at " << idx << ", holes=" << segmentState.parts.numHoles;
+    segment_state_.parts.numHoles++;
+    //LOG(INFO) << "+++ left hole at " << idx << ", holes=" << segment_state_.parts.numHoles;
   }
 
   return true;
diff --git a/src/indirect_reference_table.h b/src/indirect_reference_table.h
index a53983f..5595ae7 100644
--- a/src/indirect_reference_table.h
+++ b/src/indirect_reference_table.h
@@ -297,7 +297,7 @@
    * so may be larger than the actual number of "live" entries.
    */
   size_t Capacity() const {
-    return segmentState.parts.topIndex;
+    return segment_state_.parts.topIndex;
   }
 
   iterator begin() {
@@ -310,6 +310,9 @@
 
   void VisitRoots(Heap::RootVisitor* visitor, void* arg);
 
+  static Offset SegmentStateOffset() {
+    return Offset(OFFSETOF_MEMBER(IndirectReferenceTable, segment_state_));
+  }
  private:
   /*
    * Extract the table index from an indirect reference.
@@ -348,8 +351,8 @@
   bool GetChecked(IndirectRef) const;
   bool CheckEntry(const char*, IndirectRef, int) const;
 
-  /* semi-public - read/write by interpreter in native call handler */
-  IRTSegmentState segmentState;
+  /* semi-public - read/write by jni down calls */
+  IRTSegmentState segment_state_;
 
   /* bottom of the stack */
   const Object** table_;
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index b072833..6404b05 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -240,8 +240,7 @@
       mr_conv->Next();
       jni_conv->Next();
     }
-    CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size,
-                  out_arg_size);
+    CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size, out_arg_size);
   }
 
   if (is_static) {
@@ -261,15 +260,21 @@
                          ManagedRegister::NoRegister(), false);
     }
   }
-  // 8. Create 1st argument, the JNI environment ptr
+  // 8. Create 1st argument, the JNI environment ptr and save the top of the local reference table
   jni_conv->ResetIterator(FrameOffset(out_arg_size));
+  // Register that will hold local indirect reference table
   if (jni_conv->IsCurrentParamInRegister()) {
-    __ LoadRawPtrFromThread(jni_conv->CurrentParamRegister(),
-                            Thread::JniEnvOffset());
+    ManagedRegister jni_env = jni_conv->CurrentParamRegister();
+    DCHECK(!jni_env.Equals(jni_conv->InterproceduralScratchRegister()));
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset());
+    __ Copy(jni_conv->LocalReferenceTable_SegmentStatesOffset(), jni_env,
+            JNIEnvExt::SegmentStateOffset(), jni_conv->InterproceduralScratchRegister(), 4);
   } else {
-    __ CopyRawPtrFromThread(jni_conv->CurrentParamStackOffset(),
-                            Thread::JniEnvOffset(),
+    FrameOffset jni_env = jni_conv->CurrentParamStackOffset();
+    __ CopyRawPtrFromThread(jni_env, Thread::JniEnvOffset(),
                             jni_conv->InterproceduralScratchRegister());
+    __ Copy(jni_conv->LocalReferenceTable_SegmentStatesOffset(), jni_env,
+            JNIEnvExt::SegmentStateOffset(), jni_conv->InterproceduralScratchRegister(), 4);
   }
 
   // 9. Plant call to native code associated with method
@@ -381,7 +386,11 @@
   }
   __ Move(mr_conv->ReturnRegister(), jni_conv->ReturnRegister());
 
-  // 15. Remove SIRT from thread
+  // 15. Restore segment state and remove SIRT from thread
+  __ Copy(Thread::JniEnvOffset(), JNIEnvExt::SegmentStateOffset(),
+          jni_conv->LocalReferenceTable_SegmentStatesOffset(),
+          jni_conv->InterproceduralScratchRegister(),
+          jni_conv->ReturnScratchRegister(), 4);
   __ CopyRawPtrToThread(Thread::TopSirtOffset(), jni_conv->SirtLinkOffset(),
                         jni_conv->InterproceduralScratchRegister());
 
@@ -427,7 +436,6 @@
                                 ManagedRuntimeCallingConvention* mr_conv,
                                 JniCallingConvention* jni_conv,
                                 size_t frame_size, size_t out_arg_size) {
-
   bool input_in_reg = mr_conv->IsCurrentParamInRegister();
   bool output_in_reg = jni_conv->IsCurrentParamInRegister();
   FrameOffset sirt_offset(0);
@@ -449,7 +457,7 @@
     // as with regular references).
     sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
     // Check SIRT offset is within frame.
-    CHECK_LT(sirt_offset.Uint32Value(), (frame_size+out_arg_size));
+    CHECK_LT(sirt_offset.Uint32Value(), (frame_size + out_arg_size));
   }
 #define __ jni_asm->
   if (input_in_reg && output_in_reg) {
@@ -468,15 +476,13 @@
   } else if (!input_in_reg && !output_in_reg) {
     FrameOffset out_off = jni_conv->CurrentParamStackOffset();
     if (ref_param) {
-      __ CreateSirtEntry(out_off, sirt_offset,
-                         mr_conv->InterproceduralScratchRegister(),
+      __ CreateSirtEntry(out_off, sirt_offset, mr_conv->InterproceduralScratchRegister(),
                          null_allowed);
     } else {
       FrameOffset in_off = mr_conv->CurrentParamStackOffset();
       size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
-      __ Copy(out_off, in_off, mr_conv->InterproceduralScratchRegister(),
-              param_size);
+      __ Copy(out_off, in_off, mr_conv->InterproceduralScratchRegister(), param_size);
     }
   } else if (!input_in_reg && output_in_reg) {
     FrameOffset in_off = mr_conv->CurrentParamStackOffset();
@@ -484,10 +490,9 @@
     // Check that incoming stack arguments are above the current stack frame.
     CHECK_GT(in_off.Uint32Value(), frame_size);
     if (ref_param) {
-      __ CreateSirtEntry(out_reg, sirt_offset,
-                         ManagedRegister::NoRegister(), null_allowed);
+      __ CreateSirtEntry(out_reg, sirt_offset, ManagedRegister::NoRegister(), null_allowed);
     } else {
-      unsigned int param_size = mr_conv->CurrentParamSize();
+      size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
       __ Load(out_reg, in_off, param_size);
     }
@@ -499,8 +504,7 @@
     CHECK_LT(out_off.Uint32Value(), frame_size);
     if (ref_param) {
       // TODO: recycle value in in_reg rather than reload from SIRT
-      __ CreateSirtEntry(out_off, sirt_offset,
-                         mr_conv->InterproceduralScratchRegister(),
+      __ CreateSirtEntry(out_off, sirt_offset, mr_conv->InterproceduralScratchRegister(),
                          null_allowed);
     } else {
       size_t param_size = mr_conv->CurrentParamSize();
@@ -512,8 +516,7 @@
         // store where input straddles registers and stack
         CHECK_EQ(param_size, 8u);
         FrameOffset in_off = mr_conv->CurrentParamStackOffset();
-        __ StoreSpanning(out_off, in_reg, in_off,
-                         mr_conv->InterproceduralScratchRegister());
+        __ StoreSpanning(out_off, in_reg, in_off, mr_conv->InterproceduralScratchRegister());
       }
     }
   }
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index fbdf2bf..c2949c9 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -514,6 +514,23 @@
   EXPECT_TRUE(env_->IsSameObject(result, jobj_));
 }
 
+jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) {
+  // Add 10 local references
+  for(int i = 0; i < 10; i++) {
+    AddLocalReference<jobject>(env, Decode<Object*>(env, thisObj));
+  }
+  return x+1;
+}
+
+TEST_F(JniCompilerTest, LocalReferenceTableClearingTest) {
+  SetupForTest(false, "fooI", "(I)I", reinterpret_cast<void*>(&local_ref_test));
+  // 1000 invocations of a method that adds 10 local references
+  for(int i=0; i < 1000; i++) {
+    jint result = env_->CallIntMethod(jobj_, jmethod_, i);
+    EXPECT_TRUE(result == i + 1);
+  }
+}
+
 void my_arraycopy(JNIEnv* env, jclass klass, jobject src, jint src_pos, jobject dst, jint dst_pos, jint length) {
   EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jklass_, klass));
   EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jklass_, dst));
diff --git a/src/jni_internal.h b/src/jni_internal.h
index 1d7fd17..392ad24 100644
--- a/src/jni_internal.h
+++ b/src/jni_internal.h
@@ -122,6 +122,11 @@
 
   void DumpReferenceTables();
 
+  static Offset SegmentStateOffset() {
+    return Offset(OFFSETOF_MEMBER(JNIEnvExt, locals) +
+                  IndirectReferenceTable::SegmentStateOffset().Int32Value());
+  }
+
   Thread* const self;
   JavaVMExt* vm;