JIT mini-debug-info: Add support for zygote shared memory

Ensure that zygote's mini-debug-info is visible to apps.

Remove global seqlock synchronization.
It was replaced by entry-level synchronization.

Test: test.py -b -r --host --jit --64
Test: device boots
Bug: 119800099
Change-Id: I4885f9a4d44743d5608793a2e5d6453123d111f3
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 24ca0fc..0397ad8 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -46,40 +46,29 @@
 //    method, which is called after every modification of the linked list.
 //    GDB does this, but it is complex to set up and it stops the process.
 //
-// 2) Asynchronously, by monitoring the action_seqlock_.
-//   * The seqlock is a monotonically increasing counter which is incremented
-//     before and after every modification of the linked list. Odd value of
-//     the counter means the linked list is being modified (it is locked).
-//   * The tool should read the value of the seqlock both before and after
-//     copying the linked list.  If the seqlock values match and are even,
-//     the copy is consistent.  Otherwise, the reader should try again.
-//     * Note that using the data directly while is it being modified
-//       might crash the tool.  Therefore, the only safe way is to make
-//       a copy and use the copy only after the seqlock has been checked.
-//     * Note that the process might even free and munmap the data while
-//       it is being copied, therefore the reader should either handle
-//       SEGV or use OS calls to read the memory (e.g. process_vm_readv).
-//   * The seqlock can be used to determine the number of modifications of
-//     the linked list, which can be used to intelligently cache the data.
-//     Note the possible overflow of the seqlock.  It is intentionally
-//     32-bit, since 64-bit atomics can be tricky on some architectures.
-//   * The timestamps on the entry record the time when the entry was
-//     created which is relevant if the unwinding is not live and is
-//     postponed until much later.  All timestamps must be unique.
-//   * Memory barriers are used to make it possible to reason about
-//     the data even when it is being modified (e.g. the process crashed
-//     while that data was locked, and thus it will be never unlocked).
-//     * In particular, it should be possible to:
-//       1) read the seqlock and then the linked list head pointer.
-//       2) copy the entry and check that seqlock has not changed.
-//       3) copy the symfile and check that seqlock has not changed.
-//       4) go back to step 2 using the next pointer (if non-null).
-//       This safely creates copy of all symfiles, although other data
-//       might be inconsistent/unusable (e.g. prev_, action_timestamp_).
-//   * For full conformance with the C++ memory model, all seqlock
-//     protected accesses should be atomic. We currently do this in the
-//     more critical cases. The rest will have to be fixed before
-//     attempting to run TSAN on this code.
+// 2) Asynchronously, using the entry seqlocks.
+//   * The seqlock is a monotonically increasing counter, which
+//     is even if the entry is valid and odd if it is invalid.
+//     It is set to even value after all other fields are set,
+//     and it is set to odd value before the entry is deleted.
+//   * This makes it possible to safely read the symfile data:
+//     * The reader should read the value of the seqlock both
+//       before and after reading the symfile. If the seqlock
+//       values match and are even the copy is consistent.
+//   * Entries are recycled, but never freed, which guarantees
+//     that the seqlock is not overwritten by a random value.
+//   * The linked-list is one level higher.  The next-pointer
+//     must always point to an entry with even seqlock, which
+//     ensures that entries of a crashed process can be read.
+//     This means the entry must be added after it is created
+//     and it must be removed before it is invalidated (odd).
+//   * When iterating over the linked list the reader can use
+//     the timestamps to ensure that current and next entry
+//     were not deleted using the following steps:
+//       1) Read next pointer and the next entry's seqlock.
+//       2) Read the symfile and re-read the next pointer.
+//       3) Re-read both the current and next seqlock.
+//       4) Go to step 1 with using new entry and seqlock.
 //
 
 namespace art {
@@ -87,6 +76,11 @@
 static Mutex g_jit_debug_lock("JIT native debug entries", kNativeDebugInterfaceLock);
 static Mutex g_dex_debug_lock("DEX native debug entries", kNativeDebugInterfaceLock);
 
+// Most loads and stores need no synchronization since all memory is protected by the global locks.
+// Some writes are synchronized so libunwindstack can read the memory safely from another process.
+constexpr std::memory_order kNonRacingRelaxed = std::memory_order_relaxed;
+
+// Public binary interface between ART and native tools (gdb, libunwind, etc).
 extern "C" {
   enum JITAction {
     JIT_NOACTION = 0,
@@ -96,15 +90,13 @@
 
   // Public/stable binary interface.
   struct JITCodeEntryPublic {
-    // Atomic to ensure the reader can always iterate over the linked list
-    // (e.g. the process could crash in the middle of writing this field).
-    std::atomic<const JITCodeEntry*> next_;
-    const JITCodeEntry* prev_;     // For linked list deletion.  Unused in readers.
-    const uint8_t* symfile_addr_;  // Address of the in-memory ELF file.
-    uint64_t symfile_size_;        // Beware of the offset (12 on x86; but 16 on ARM32).
+    std::atomic<const JITCodeEntry*> next_;  // Atomic to guarantee consistency after crash.
+    const JITCodeEntry* prev_ = nullptr;     // For linked list deletion.  Unused in readers.
+    const uint8_t* symfile_addr_ = nullptr;  // Address of the in-memory ELF file.
+    uint64_t symfile_size_ = 0;              // Note that the offset is 12 on x86, but 16 on ARM32.
 
     // Android-specific fields:
-    uint64_t register_timestamp_;  // CLOCK_MONOTONIC time of entry registration.
+    std::atomic_uint32_t seqlock_{1};        // Synchronization. Even value if entry is valid.
   };
 
   // Implementation-specific fields (which can be used only in this file).
@@ -120,26 +112,34 @@
     bool is_compressed_ = false;
   };
 
-  struct JITDescriptor {
+  // Public/stable binary interface.
+  struct JITDescriptorPublic {
     uint32_t version_ = 1;                            // NB: GDB supports only version 1.
     uint32_t action_flag_ = JIT_NOACTION;             // One of the JITAction enum values.
     const JITCodeEntry* relevant_entry_ = nullptr;    // The entry affected by the action.
     std::atomic<const JITCodeEntry*> head_{nullptr};  // Head of link list of all entries.
-
-    // Android-specific fields:
-    uint8_t magic_[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '1'};
-    uint32_t flags_ = 0;  // Reserved for future use. Must be 0.
-    uint32_t sizeof_descriptor = sizeof(JITDescriptor);
-    uint32_t sizeof_entry = sizeof(JITCodeEntryPublic);
-    std::atomic_uint32_t action_seqlock_{0};  // Incremented before and after any modification.
-    uint64_t action_timestamp_ = 1;           // CLOCK_MONOTONIC time of last action.
   };
 
+  // Implementation-specific fields (which can be used only in this file).
+  struct JITDescriptor : public JITDescriptorPublic {
+    const JITCodeEntry* free_entries_ = nullptr;  // List of deleted entries ready for reuse.
+
+    // Used for memory sharing with zygote. See NativeDebugInfoPreFork().
+    const JITCodeEntry* zygote_head_entry_ = nullptr;
+    JITCodeEntry application_tail_entry_{};
+  };
+
+  // Public interface: Can be used by reader to check the structs have the expected size.
+  uint32_t g_art_sizeof_jit_code_entry = sizeof(JITCodeEntryPublic);
+  uint32_t g_art_sizeof_jit_descriptor = sizeof(JITDescriptorPublic);
+
   // Check that std::atomic has the expected layout.
   static_assert(alignof(std::atomic_uint32_t) == alignof(uint32_t), "Weird alignment");
   static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t), "Weird size");
+  static_assert(std::atomic_uint32_t::is_always_lock_free, "Expected to be lock free");
   static_assert(alignof(std::atomic<void*>) == alignof(void*), "Weird alignment");
   static_assert(sizeof(std::atomic<void*>) == sizeof(void*), "Weird size");
+  static_assert(std::atomic<void*>::is_always_lock_free, "Expected to be lock free");
 
   // GDB may set breakpoint here. We must ensure it is not removed or deduplicated.
   void __attribute__((noinline)) __jit_debug_register_code() {
@@ -176,7 +176,12 @@
   static const void* Alloc(size_t size) { return Memory()->AllocateData(size); }
   static void Free(const void* ptr) { Memory()->FreeData(reinterpret_cast<const uint8_t*>(ptr)); }
   static void Free(void* ptr) = delete;
+
   template<class T> static T* Writable(const T* v) {
+    // Special case: This entry is in static memory and not allocated in JIT memory.
+    if (v == reinterpret_cast<const void*>(&Descriptor().application_tail_entry_)) {
+      return const_cast<T*>(v);
+    }
     return const_cast<T*>(Memory()->GetWritableDataAddress(v));
   }
 
@@ -194,34 +199,29 @@
   return ArrayRef<const uint8_t>(entry->symfile_addr_, entry->symfile_size_);
 }
 
-// Mark the descriptor as "locked", so native tools know the data is being modified.
-static void ActionSeqlock(JITDescriptor& descriptor) {
-  DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 0u) << "Already locked";
-  descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
-  // Ensure that any writes within the locked section cannot be reordered before the increment.
-  std::atomic_thread_fence(std::memory_order_release);
-}
-
-// Mark the descriptor as "unlocked", so native tools know the data is safe to read.
-static void ActionSequnlock(JITDescriptor& descriptor) {
-  DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 1u) << "Already unlocked";
-  // Ensure that any writes within the locked section cannot be reordered after the increment.
-  std::atomic_thread_fence(std::memory_order_release);
-  descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
-}
-
+// This must be called with the appropriate lock taken (g_{jit,dex}_debug_lock).
 template<class NativeInfo>
 static const JITCodeEntry* CreateJITCodeEntryInternal(
-    ArrayRef<const uint8_t> symfile,
+    ArrayRef<const uint8_t> symfile = ArrayRef<const uint8_t>(),
     const void* addr = nullptr,
     bool allow_packing = false,
     bool is_compressed = false) {
   JITDescriptor& descriptor = NativeInfo::Descriptor();
 
+  // Allocate JITCodeEntry if needed.
+  if (descriptor.free_entries_ == nullptr) {
+    const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry));
+    if (memory == nullptr) {
+      LOG(ERROR) << "Failed to allocate memory for native debug info";
+      return nullptr;
+    }
+    new (NativeInfo::Writable(memory)) JITCodeEntry();
+    descriptor.free_entries_ = reinterpret_cast<const JITCodeEntry*>(memory);
+  }
+
   // Make a copy of the buffer to shrink it and to pass ownership to JITCodeEntry.
-  const uint8_t* copy = nullptr;
-  if (NativeInfo::kCopySymfileData) {
-    copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size()));
+  if (NativeInfo::kCopySymfileData && !symfile.empty()) {
+    const uint8_t* copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size()));
     if (copy == nullptr) {
       LOG(ERROR) << "Failed to allocate memory for native debug info";
       return nullptr;
@@ -230,41 +230,38 @@
     symfile = ArrayRef<const uint8_t>(copy, symfile.size());
   }
 
-  // Ensure the timestamp is monotonically increasing even in presence of low
-  // granularity system timer.  This ensures each entry has unique timestamp.
-  uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
-
-  const JITCodeEntry* head = descriptor.head_.load(std::memory_order_relaxed);
-  const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry));
-  if (memory == nullptr) {
-    LOG(ERROR) << "Failed to allocate memory for native debug info";
-    if (copy != nullptr) {
-      NativeInfo::Free(copy);
-    }
-    return nullptr;
+  // Zygote must insert entries at specific place.  See NativeDebugInfoPreFork().
+  std::atomic<const JITCodeEntry*>* head = &descriptor.head_;
+  const JITCodeEntry* prev = nullptr;
+  if (Runtime::Current()->IsZygote() && descriptor.zygote_head_entry_ != nullptr) {
+    head = &NativeInfo::Writable(descriptor.zygote_head_entry_)->next_;
+    prev = descriptor.zygote_head_entry_;
   }
-  const JITCodeEntry* entry = reinterpret_cast<const JITCodeEntry*>(memory);
+  const JITCodeEntry* next = head->load(kNonRacingRelaxed);
+
+  // Pop entry from the free list.
+  const JITCodeEntry* entry = descriptor.free_entries_;
+  descriptor.free_entries_ = descriptor.free_entries_->next_.load(kNonRacingRelaxed);
+  CHECK_EQ(entry->seqlock_.load(kNonRacingRelaxed) & 1, 1u) << "Expected invalid entry";
+
+  // Create the entry and set all its fields.
   JITCodeEntry* writable_entry = NativeInfo::Writable(entry);
+  writable_entry->next_.store(next, std::memory_order_relaxed);
+  writable_entry->prev_ = prev;
   writable_entry->symfile_addr_ = symfile.data();
   writable_entry->symfile_size_ = symfile.size();
-  writable_entry->prev_ = nullptr;
-  writable_entry->next_.store(head, std::memory_order_relaxed);
-  writable_entry->register_timestamp_ = timestamp;
   writable_entry->addr_ = addr;
   writable_entry->allow_packing_ = allow_packing;
   writable_entry->is_compressed_ = is_compressed;
+  writable_entry->seqlock_.fetch_add(1, std::memory_order_release);  // Mark as valid.
 
-  // We are going to modify the linked list, so take the seqlock.
-  ActionSeqlock(descriptor);
-  if (head != nullptr) {
-    NativeInfo::Writable(head)->prev_ = entry;
+  // Add the entry to the main link-list.
+  if (next != nullptr) {
+    NativeInfo::Writable(next)->prev_ = entry;
   }
-  descriptor.head_.store(entry, std::memory_order_relaxed);
+  head->store(entry, std::memory_order_release);
   descriptor.relevant_entry_ = entry;
   descriptor.action_flag_ = JIT_REGISTER_FN;
-  descriptor.action_timestamp_ = timestamp;
-  ActionSequnlock(descriptor);
-
   NativeInfo::NotifyNativeDebugger();
 
   return entry;
@@ -276,13 +273,8 @@
   const uint8_t* symfile = entry->symfile_addr_;
   JITDescriptor& descriptor = NativeInfo::Descriptor();
 
-  // Ensure the timestamp is monotonically increasing even in presence of low
-  // granularity system timer.  This ensures each entry has unique timestamp.
-  uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
-
-  // We are going to modify the linked list, so take the seqlock.
-  ActionSeqlock(descriptor);
-  const JITCodeEntry* next = entry->next_.load(std::memory_order_relaxed);
+  // Remove the entry from the main linked-list.
+  const JITCodeEntry* next = entry->next_.load(kNonRacingRelaxed);
   if (entry->prev_ != nullptr) {
     NativeInfo::Writable(entry->prev_)->next_.store(next, std::memory_order_relaxed);
   } else {
@@ -293,21 +285,22 @@
   }
   descriptor.relevant_entry_ = entry;
   descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  descriptor.action_timestamp_ = timestamp;
-  ActionSequnlock(descriptor);
-
   NativeInfo::NotifyNativeDebugger();
 
-  // Ensure that clear below can not be reordered above the unlock above.
+  // Delete the entry.
+  JITCodeEntry* writable_entry = NativeInfo::Writable(entry);
+  CHECK_EQ(writable_entry->seqlock_.load(kNonRacingRelaxed) & 1, 0u) << "Expected valid entry";
+  // Release: Ensures that "next_" points to valid entry at any time in reader.
+  writable_entry->seqlock_.fetch_add(1, std::memory_order_release);  // Mark as invalid.
+  // Release: Ensures that the entry is seen as invalid before it's data is freed.
   std::atomic_thread_fence(std::memory_order_release);
-
-  // Aggressively clear the entry as an extra check of the synchronisation.
-  memset(NativeInfo::Writable(entry), 0, sizeof(*entry));
-
-  NativeInfo::Free(entry);
-  if (NativeInfo::kCopySymfileData) {
+  if (NativeInfo::kCopySymfileData && symfile != nullptr) {
     NativeInfo::Free(symfile);
   }
+
+  // Push the entry to the free list.
+  writable_entry->next_.store(descriptor.free_entries_, kNonRacingRelaxed);
+  descriptor.free_entries_ = entry;
 }
 
 void AddNativeDebugInfoForDex(Thread* self, const DexFile* dexfile) {
@@ -332,6 +325,48 @@
   }
 }
 
+// Splits the linked linked in to two parts:
+// The first part (including the static head pointer) is owned by the application.
+// The second part is owned by zygote and might be concurrently modified by it.
+//
+// We add two empty entries at the boundary which are never removed (app_tail, zygote_head).
+// These entries are needed to preserve the next/prev pointers in the linked list,
+// since zygote can not modify the application's data and vice versa.
+//
+//          <--- owned by the application memory ---> <--- owned by zygote memory --->
+//         |----------------------|------------------|-------------|-----------------|
+// head -> | application_entries* | application_tail | zygote_head | zygote_entries* |
+//         |----------------------|------------------|-------------|-----------------|
+//
+void NativeDebugInfoPreFork() {
+  CHECK(Runtime::Current()->IsZygote());
+  JITDescriptor& descriptor = JitNativeInfo::Descriptor();
+  if (descriptor.zygote_head_entry_ != nullptr) {
+    return;  // Already done - we need to do this only on the first fork.
+  }
+
+  // Create the zygote-owned head entry (with no ELF file).
+  // The data will be allocated from the current JIT memory (owned by zygote).
+  MutexLock mu(Thread::Current(), *Locks::jit_lock_);  // Needed to alloc entry.
+  const JITCodeEntry* zygote_head = CreateJITCodeEntryInternal<JitNativeInfo>();
+  CHECK(zygote_head != nullptr);
+  descriptor.zygote_head_entry_ = zygote_head;
+
+  // Create the child-owned tail entry (with no ELF file).
+  // The data is statically allocated since it must be owned by the forked process.
+  JITCodeEntry* app_tail = &descriptor.application_tail_entry_;
+  app_tail->next_ = zygote_head;
+  app_tail->seqlock_.store(2, kNonRacingRelaxed);  // Mark as valid.
+  descriptor.head_.store(app_tail, std::memory_order_release);
+}
+
+void NativeDebugInfoPostFork() {
+  JITDescriptor& descriptor = JitNativeInfo::Descriptor();
+  if (!Runtime::Current()->IsZygote()) {
+    descriptor.free_entries_ = nullptr;  // Don't reuse zygote's entries.
+  }
+}
+
 // Size of JIT code range covered by each packed JITCodeEntry.
 static constexpr uint32_t kJitRepackGroupSize = 64 * KB;
 
@@ -349,11 +384,16 @@
   if (jit == nullptr) {
     return;
   }
+  JITDescriptor& descriptor = __jit_debug_descriptor;
+  bool is_zygote = Runtime::Current()->IsZygote();
 
   // Collect entries that we want to pack.
   std::vector<const JITCodeEntry*> entries;
   entries.reserve(2 * kJitRepackFrequency);
-  for (const JITCodeEntry* it = __jit_debug_descriptor.head_; it != nullptr; it = it->next_) {
+  for (const JITCodeEntry* it = descriptor.head_; it != nullptr; it = it->next_) {
+    if (it == descriptor.zygote_head_entry_ && !is_zygote) {
+      break;  // Memory owned by the zygote process (read-only for an app).
+    }
     if (it->allow_packing_) {
       if (!compress && it->is_compressed_ && removed.empty()) {
         continue;  // If we are not compressing, also avoid decompressing.
@@ -420,10 +460,6 @@
   MutexLock mu(Thread::Current(), g_jit_debug_lock);
   DCHECK_NE(symfile.size(), 0u);
 
-  if (Runtime::Current()->IsZygote()) {
-    return;  // TODO: Implement memory sharing with the zygote process.
-  }
-
   CreateJITCodeEntryInternal<JitNativeInfo>(ArrayRef<const uint8_t>(symfile),
                                             /*addr=*/ code_ptr,
                                             /*allow_packing=*/ allow_packing,
@@ -437,8 +473,10 @@
   // Automatically repack entries on regular basis to save space.
   // Pack (but don't compress) recent entries - this is cheap and reduces memory use by ~4x.
   // We delay compression until after GC since it is more expensive (and saves further ~4x).
+  // Always compress zygote, since it does not GC and we want to keep the high-water mark low.
   if (++g_jit_num_unpacked_entries >= kJitRepackFrequency) {
-    RepackEntries(/*compress=*/ false, /*removed=*/ ArrayRef<const void*>());
+    bool is_zygote = Runtime::Current()->IsZygote();
+    RepackEntries(/*compress=*/ is_zygote, /*removed=*/ ArrayRef<const void*>());
   }
 }
 
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index 0bb3236..477d58c 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -32,6 +32,13 @@
 class Thread;
 struct JITCodeEntry;
 
+// Must be called before zygote forks.
+// Used to ensure that zygote's mini-debug-info can be shared with apps.
+void NativeDebugInfoPreFork();
+
+// Must be called after zygote forks.
+void NativeDebugInfoPostFork();
+
 ArrayRef<const uint8_t> GetJITCodeEntrySymFile(const JITCodeEntry*);
 
 // Notify native tools (e.g. libunwind) that DEX file has been opened.
@@ -54,7 +61,7 @@
     REQUIRES_SHARED(Locks::jit_lock_);  // Might need JIT code cache to allocate memory.
 
 // Returns approximate memory used by debug info for JIT code.
-size_t GetJitMiniDebugInfoMemUsage();
+size_t GetJitMiniDebugInfoMemUsage() REQUIRES_SHARED(Locks::jit_lock_);
 
 // Get the lock which protects the native debug info.
 // Used only in tests to unwind while the JIT thread is running.
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index fd714a8..c9b458f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -1222,6 +1222,8 @@
     return;
   }
   thread_pool_->DeleteThreads();
+
+  NativeDebugInfoPreFork();
 }
 
 void Jit::PostZygoteFork() {
@@ -1229,6 +1231,8 @@
     return;
   }
   thread_pool_->CreateThreads();
+
+  NativeDebugInfoPostFork();
 }
 
 void Jit::BootCompleted() {