Merge "Optimizing/ARM: Improve shifts of long values by a constant."
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7ea6176..a7bd31e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2692,7 +2692,7 @@
   }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetConstant(GetType(), 1);
+    return GetBlock()->GetGraph()->GetIntConstant(1);
   }
 
   DECLARE_INSTRUCTION(Equal);
@@ -2728,7 +2728,7 @@
   }
   HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
                       HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetConstant(GetType(), 0);
+    return GetBlock()->GetGraph()->GetIntConstant(0);
   }
 
   DECLARE_INSTRUCTION(NotEqual);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 95f0ccb..5fd8969 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,7 +809,90 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // RDI: type_idx, RSI: ArtMethod*, RAX: return value
+    // RDX, RCX, R8, R9: free.
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx   // Load dex cache resolved types array
+                                                              // Load the class (edx)
+    movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    testl  %edx, %edx                                         // Check null class
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Check class status.
+    cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // We don't need a fence (between the
+                                                              // the status and the access flag
+                                                              // loads) here because every load is
+                                                              // a load acquire on x86.
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable
+    testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Check if the thread local
+                                                              // allocation stack has room.
+    movq   %gs:THREAD_SELF_OFFSET, %r8                        // r8 = thread
+    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx     // rcx = alloc stack top.
+    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
+    jae    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Load the object size
+    movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
+                                                              // Check if the size is for a thread
+                                                              // local allocation
+    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
+    ja     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Compute the rosalloc bracket index
+                                                              // from the size.
+                                                              // Align up the size by the rosalloc
+                                                              // bracket quantum size and divide
+                                                              // by the quantum size and subtract
+                                                              // by 1. This code is a shorter but
+                                                              // equivalent version.
+    subq   LITERAL(1), %rax
+    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
+                                                              // Load the rosalloc run (r9)
+    movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
+                                                              // Load the free list head (rax). This
+                                                              // will be the return val.
+    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
+    testq  %rax, %rax
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
+                                                              // Push the new object onto the thread
+                                                              // local allocation stack and
+                                                              // increment the thread local
+                                                              // allocation stack top.
+    movl   %eax, (%rcx)
+    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
+    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
+                                                              // Load the next pointer of the head
+                                                              // and update the list head with the
+                                                              // next pointer.
+    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
+    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
+                                                              // Store the class pointer in the
+                                                              // header. This also overwrites the
+                                                              // next pointer. The offsets are
+                                                              // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF edx
+    movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+                                                              // Decrement the size of the free list
+    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
+                                                              // No fence necessary for x86.
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                         // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                         // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)               // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                       // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
+END_FUNCTION art_quick_alloc_object_rosalloc
+
 // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ab93142..fd2426b 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1164,6 +1164,7 @@
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
+  delete thread_flip_lock_;
   delete pending_task_lock_;
   delete backtrace_lock_;
   if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 3790e53..edcbcf2 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -29,10 +29,25 @@
   static uint8_t* BaseBegin(MemMap* mem_map) {
     return reinterpret_cast<uint8_t*>(mem_map->base_begin_);
   }
+
   static size_t BaseSize(MemMap* mem_map) {
     return mem_map->base_size_;
   }
 
+  static uint8_t* GetValidMapAddress(size_t size, bool low_4gb) {
+    // Find a valid map address and unmap it before returning.
+    std::string error_msg;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("temp",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ,
+                                                     low_4gb,
+                                                     false,
+                                                     &error_msg));
+    CHECK(map != nullptr);
+    return map->Begin();
+  }
+
   static void RemapAtEndTest(bool low_4gb) {
     std::string error_msg;
     // Cast the page size to size_t.
@@ -189,9 +204,11 @@
 TEST_F(MemMapTest, MapAnonymousExactAddr) {
   CommonInit();
   std::string error_msg;
+  // Find a valid address.
+  uint8_t* valid_address = GetValidMapAddress(kPageSize, /*low_4gb*/false);
   // Map at an address that should work, which should succeed.
   std::unique_ptr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
-                                                    reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
+                                                    valid_address,
                                                     kPageSize,
                                                     PROT_READ | PROT_WRITE,
                                                     false,
@@ -199,7 +216,7 @@
                                                     &error_msg));
   ASSERT_TRUE(map0.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  ASSERT_TRUE(map0->BaseBegin() == reinterpret_cast<void*>(ART_BASE_ADDRESS));
+  ASSERT_TRUE(map0->BaseBegin() == valid_address);
   // Map at an unspecified address, which should succeed.
   std::unique_ptr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
                                                     nullptr,
@@ -237,18 +254,27 @@
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
-    uintptr_t start_addr = ART_BASE_ADDRESS + 0x1000000;
+    constexpr size_t size = 0x100000;
+    // Try all addresses starting from 2GB to 4GB.
+    size_t start_addr = 2 * GB;
     std::string error_msg;
-    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
-                                                     reinterpret_cast<uint8_t*>(start_addr),
-                                                     0x21000000,
-                                                     PROT_READ | PROT_WRITE,
-                                                     true,
-                                                     false,
-                                                     &error_msg));
+    std::unique_ptr<MemMap> map;
+    for (; start_addr <= std::numeric_limits<uint32_t>::max() - size; start_addr += size) {
+      map.reset(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
+                                     reinterpret_cast<uint8_t*>(start_addr),
+                                     size,
+                                     PROT_READ | PROT_WRITE,
+                                     /*low_4gb*/true,
+                                     false,
+                                     &error_msg));
+      if (map != nullptr) {
+        break;
+      }
+    }
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
     ASSERT_TRUE(error_msg.empty());
-    ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), start_addr);
+    ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
 }