Revert^4 "ART: Dual mapping of JIT code pages"

This reverts commit 1bfce389e8aba4ca7276918f2f86e5b13c5b5840.

This change adds a fix for stale data in the JIT code cache when dual
mappings of the code cache are employed. With dual mappings, newly
compiled code is written into the non-executable mapping of the code
cache. An additional data cache flush is needed to ensure the new
code is written out to the point of unification in the memory
hierarchy.

Test: manual
Bug: 66095511
Bug: 116761923

Change-Id: Ib5fa83f22ee4fee1d0f80e16b974e1677c20af9b
diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc
index 1bf553d..92551f1 100644
--- a/libartbase/base/mem_map.cc
+++ b/libartbase/base/mem_map.cc
@@ -692,6 +692,24 @@
                           int tail_prot,
                           std::string* error_msg,
                           bool use_debug_name) {
+  return RemapAtEnd(new_end,
+                    tail_name,
+                    tail_prot,
+                    MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS,
+                    /* fd */ -1,
+                    /* offset */ 0,
+                    error_msg,
+                    use_debug_name);
+}
+
+MemMap MemMap::RemapAtEnd(uint8_t* new_end,
+                          const char* tail_name,
+                          int tail_prot,
+                          int flags,
+                          int fd,
+                          off_t offset,
+                          std::string* error_msg,
+                          bool use_debug_name) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -715,9 +733,6 @@
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-  unique_fd fd;
-  int flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS;
-
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Note: Do not explicitly unmap the tail region, mmap() with MAP_FIXED automatically
   // removes old mappings for the overlapping region. This makes the operation atomic
@@ -726,13 +741,13 @@
                                                           tail_base_size,
                                                           tail_prot,
                                                           flags,
-                                                          fd.get(),
-                                                          0));
+                                                          fd,
+                                                          offset));
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
+    *error_msg = StringPrintf("map(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
                               "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags,
-                              fd.get());
+                              fd);
     return Invalid();
   }
   // Update *this.
diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h
index 20eda32..309da27 100644
--- a/libartbase/base/mem_map.h
+++ b/libartbase/base/mem_map.h
@@ -261,6 +261,16 @@
                     std::string* error_msg,
                     bool use_debug_name = true);
 
+  // Unmap the pages of a file at end and remap them to create another memory map.
+  MemMap RemapAtEnd(uint8_t* new_end,
+                    const char* tail_name,
+                    int tail_prot,
+                    int tail_flags,
+                    int fd,
+                    off_t offset,
+                    std::string* error_msg,
+                    bool use_debug_name = true);
+
   // Take ownership of pages at the beginning of the mapping. The mapping must be an
   // anonymous reservation mapping, owning entire pages. The `byte_count` must not
   // exceed the size of this reservation.
diff --git a/libartbase/base/mem_map_test.cc b/libartbase/base/mem_map_test.cc
index ab3d18f..bf143d4 100644
--- a/libartbase/base/mem_map_test.cc
+++ b/libartbase/base/mem_map_test.cc
@@ -455,6 +455,53 @@
 }
 #endif
 
+TEST_F(MemMapTest, RemapFileViewAtEnd) {
+  CommonInit();
+  std::string error_msg;
+  ScratchFile scratch_file;
+
+  // Create a scratch file 3 pages large.
+  constexpr size_t kMapSize = 3 * kPageSize;
+  std::unique_ptr<uint8_t[]> data(new uint8_t[kMapSize]());
+  memset(data.get(), 1, kPageSize);
+  memset(&data[0], 0x55, kPageSize);
+  memset(&data[kPageSize], 0x5a, kPageSize);
+  memset(&data[2 * kPageSize], 0xaa, kPageSize);
+  ASSERT_TRUE(scratch_file.GetFile()->WriteFully(&data[0], kMapSize));
+
+  MemMap map = MemMap::MapFile(/*byte_count*/kMapSize,
+                               PROT_READ,
+                               MAP_PRIVATE,
+                               scratch_file.GetFd(),
+                               /*start*/0,
+                               /*low_4gb*/true,
+                               scratch_file.GetFilename().c_str(),
+                               &error_msg);
+  ASSERT_TRUE(map.IsValid()) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_EQ(map.Size(), kMapSize);
+  ASSERT_LT(reinterpret_cast<uintptr_t>(map.BaseBegin()), 1ULL << 32);
+  ASSERT_EQ(data[0], *map.Begin());
+  ASSERT_EQ(data[kPageSize], *(map.Begin() + kPageSize));
+  ASSERT_EQ(data[2 * kPageSize], *(map.Begin() + 2 * kPageSize));
+
+  for (size_t offset = 2 * kPageSize; offset > 0; offset -= kPageSize) {
+    MemMap tail = map.RemapAtEnd(map.Begin() + offset,
+                                 "bad_offset_map",
+                                 PROT_READ,
+                                 MAP_PRIVATE | MAP_FIXED,
+                                 scratch_file.GetFd(),
+                                 offset,
+                                 &error_msg);
+    ASSERT_TRUE(tail.IsValid()) << error_msg;
+    ASSERT_TRUE(error_msg.empty());
+    ASSERT_EQ(offset, map.Size());
+    ASSERT_EQ(static_cast<size_t>(kPageSize), tail.Size());
+    ASSERT_EQ(tail.Begin(), map.Begin() + map.Size());
+    ASSERT_EQ(data[offset], *tail.Begin());
+  }
+}
+
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
   // Some MIPS32 hardware (namely the Creator Ci20 development board)
   // cannot allocate in the 2GB-4GB region.
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 33d228f..63cb6a4 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -18,12 +18,15 @@
 
 #include <sstream>
 
+#include "android-base/unique_fd.h"
+
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/histogram-inl.h"
 #include "base/logging.h"  // For VLOG.
 #include "base/membarrier.h"
+#include "base/memfd.h"
 #include "base/mem_map.h"
 #include "base/quasi_atomic.h"
 #include "base/stl_util.h"
@@ -52,16 +55,32 @@
 #include "thread-current-inl.h"
 #include "thread_list.h"
 
+using android::base::unique_fd;
+
 namespace art {
 namespace jit {
 
-static constexpr int kProtCode = PROT_READ | PROT_EXEC;
-static constexpr int kProtData = PROT_READ | PROT_WRITE;
-static constexpr int kProtProfile = PROT_READ;
-
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
 
+static constexpr int kProtR = PROT_READ;
+static constexpr int kProtRW = PROT_READ | PROT_WRITE;
+static constexpr int kProtRWX = PROT_READ | PROT_WRITE | PROT_EXEC;
+static constexpr int kProtRX = PROT_READ | PROT_EXEC;
+
+namespace {
+
+// Translate an address belonging to one memory map into an address in a second. This is useful
+// when there are two virtual memory ranges for the same physical memory range.
+template <typename T>
+T* TranslateAddress(T* src_ptr, const MemMap& src, const MemMap& dst) {
+  CHECK(src.HasAddress(src_ptr));
+  uint8_t* const raw_src_ptr = reinterpret_cast<uint8_t*>(src_ptr);
+  return reinterpret_cast<T*>(raw_src_ptr - src.Begin() + dst.Begin());
+}
+
+}  // namespace
+
 class JitCodeCache::JniStubKey {
  public:
   explicit JniStubKey(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -190,17 +209,41 @@
 
   // Register for membarrier expedited sync core if JIT will be generating code.
   if (!used_only_for_profile_data) {
-    art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore);
+    if (art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore) != 0) {
+      // MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ensures that CPU instruction pipelines are
+      // flushed and it's used when adding code to the JIT. The memory used by the new code may
+      // have just been released and, in theory, the old code could still be in a pipeline.
+      VLOG(jit) << "Kernel does not support membarrier sync-core";
+    }
   }
 
-  // Decide how we should map the code and data sections.
-  // If we use the code cache just for profiling we do not need to map the code section as
-  // executable.
-  // NOTE 1: this is yet another workaround to bypass strict SElinux policies in order to be able
-  //         to profile system server.
-  // NOTE 2: We could just not create the code section at all but we will need to
-  //         special case too many cases.
-  int memmap_flags_prot_code = used_only_for_profile_data ? kProtProfile : kProtCode;
+  // File descriptor enabling dual-view mapping of code section.
+  unique_fd mem_fd;
+
+  // Bionic supports memfd_create, but the call may fail on older kernels.
+  mem_fd = unique_fd(art::memfd_create("/jit-cache", /* flags */ 0));
+  if (mem_fd.get() < 0) {
+    VLOG(jit) << "Failed to initialize dual view JIT. memfd_create() error: "
+              << strerror(errno);
+  }
+
+  if (mem_fd.get() >= 0 && ftruncate(mem_fd, max_capacity) != 0) {
+    std::ostringstream oss;
+    oss << "Failed to initialize memory file: " << strerror(errno);
+    *error_msg = oss.str();
+    return nullptr;
+  }
+
+  // Data cache will be half of the initial allocation.
+  // Code cache will be the other half of the initial allocation.
+  // TODO: Make this variable?
+
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+  const size_t data_capacity = max_capacity / 2;
+  const size_t exec_capacity = used_only_for_profile_data ? 0 : max_capacity - data_capacity;
+  DCHECK_LE(data_capacity + exec_capacity, max_capacity);
 
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
@@ -208,71 +251,147 @@
   // We could do PC-relative addressing to avoid this problem, but that
   // would require reserving code and data area before submitting, which
   // means more windows for the code memory to be RWX.
-  MemMap data_map = MemMap::MapAnonymous(
-      "data-code-cache",
-      /* addr */ nullptr,
-      max_capacity,
-      kProtData,
-      /* low_4gb */ true,
-      /* reuse */ false,
-      /* reservation */ nullptr,
-      &error_str);
-  if (!data_map.IsValid()) {
+  int base_flags;
+  MemMap data_pages;
+  if (mem_fd.get() >= 0) {
+    // Dual view of JIT code cache case. Create an initial mapping of data pages large enough
+    // for data and non-writable view of JIT code pages. We use the memory file descriptor to
+    // enable dual mapping - we'll create a second mapping using the descriptor below. The
+    // mappings will look like:
+    //
+    //       VA                  PA
+    //
+    //       +---------------+
+    //       | non exec code |\
+    //       +---------------+ \
+    //       :               :\ \
+    //       +---------------+.\.+---------------+
+    //       |  exec code    |  \|     code      |
+    //       +---------------+...+---------------+
+    //       |      data     |   |     data      |
+    //       +---------------+...+---------------+
+    //
+    // In this configuration code updates are written to the non-executable view of the code
+    // cache, and the executable view of the code cache has fixed RX memory protections.
+    //
+    // This memory needs to be mapped shared as the code portions will have two mappings.
+    base_flags = MAP_SHARED;
+    data_pages = MemMap::MapFile(
+        data_capacity + exec_capacity,
+        kProtRW,
+        base_flags,
+        mem_fd,
+        /* start */ 0,
+        /* low_4gb */ true,
+        "data-code-cache",
+        &error_str);
+  } else {
+    // Single view of JIT code cache case. Create an initial mapping of data pages large enough
+    // for data and JIT code pages. The mappings will look like:
+    //
+    //       VA                  PA
+    //
+    //       +---------------+...+---------------+
+    //       |  exec code    |   |     code      |
+    //       +---------------+...+---------------+
+    //       |      data     |   |     data      |
+    //       +---------------+...+---------------+
+    //
+    // In this configuration code updates are written to the executable view of the code cache,
+    // and the executable view of the code cache transitions RX to RWX for the update and then
+    // back to RX after the update.
+    base_flags = MAP_PRIVATE | MAP_ANON;
+    data_pages = MemMap::MapAnonymous(
+        "data-code-cache",
+        /* addr */ nullptr,
+        data_capacity + exec_capacity,
+        kProtRW,
+        /* low_4gb */ true,
+        /* reuse */ false,
+        /* reservation */ nullptr,
+        &error_str);
+  }
+
+  if (!data_pages.IsValid()) {
     std::ostringstream oss;
     oss << "Failed to create read write cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
 
-  // Align both capacities to page size, as that's the unit mspaces use.
-  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
-  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+  MemMap exec_pages;
+  MemMap non_exec_pages;
+  if (exec_capacity > 0) {
+    uint8_t* const divider = data_pages.Begin() + data_capacity;
+    // Set initial permission for executable view to catch any SELinux permission problems early
+    // (for processes that cannot map WX pages). Otherwise, this region does not need to be
+    // executable as there is no code in the cache yet.
+    exec_pages = data_pages.RemapAtEnd(divider,
+                                       "jit-code-cache",
+                                       kProtRX,
+                                       base_flags | MAP_FIXED,
+                                       mem_fd.get(),
+                                       (mem_fd.get() >= 0) ? data_capacity : 0,
+                                       &error_str);
+    if (!exec_pages.IsValid()) {
+      std::ostringstream oss;
+      oss << "Failed to create read execute code cache: " << error_str << " size=" << max_capacity;
+      *error_msg = oss.str();
+      return nullptr;
+    }
 
-  // Data cache is 1 / 2 of the map.
-  // TODO: Make this variable?
-  size_t data_size = max_capacity / 2;
-  size_t code_size = max_capacity - data_size;
-  DCHECK_EQ(code_size + data_size, max_capacity);
-  uint8_t* divider = data_map.Begin() + data_size;
-
-  MemMap code_map = data_map.RemapAtEnd(
-      divider, "jit-code-cache", memmap_flags_prot_code | PROT_WRITE, &error_str);
-  if (!code_map.IsValid()) {
-    std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
-    *error_msg = oss.str();
-    return nullptr;
+    if (mem_fd.get() >= 0) {
+      // For dual view, create the secondary view of code memory used for updating code. This view
+      // is never executable.
+      non_exec_pages = MemMap::MapFile(exec_capacity,
+                                       kProtR,
+                                       base_flags,
+                                       mem_fd,
+                                       /* start */ data_capacity,
+                                       /* low_4GB */ false,
+                                       "jit-code-cache-rw",
+                                       &error_str);
+      if (!non_exec_pages.IsValid()) {
+        // Log and continue as single view JIT.
+        VLOG(jit) << "Failed to map non-executable view of JIT code cache";
+      }
+    }
+  } else {
+    // Profiling only. No memory for code required.
+    DCHECK(used_only_for_profile_data);
   }
-  DCHECK_EQ(code_map.Begin(), divider);
-  data_size = initial_capacity / 2;
-  code_size = initial_capacity - data_size;
-  DCHECK_EQ(code_size + data_size, initial_capacity);
+
+  const size_t initial_data_capacity = initial_capacity / 2;
+  const size_t initial_exec_capacity =
+      (exec_capacity == 0) ? 0 : (initial_capacity - initial_data_capacity);
+
   return new JitCodeCache(
-      std::move(code_map),
-      std::move(data_map),
-      code_size,
-      data_size,
+      std::move(data_pages),
+      std::move(exec_pages),
+      std::move(non_exec_pages),
+      initial_data_capacity,
+      initial_exec_capacity,
       max_capacity,
-      garbage_collect_code,
-      memmap_flags_prot_code);
+      garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap&& code_map,
-                           MemMap&& data_map,
-                           size_t initial_code_capacity,
+JitCodeCache::JitCodeCache(MemMap&& data_pages,
+                           MemMap&& exec_pages,
+                           MemMap&& non_exec_pages,
                            size_t initial_data_capacity,
+                           size_t initial_exec_capacity,
                            size_t max_capacity,
-                           bool garbage_collect_code,
-                           int memmap_flags_prot_code)
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
-      code_map_(std::move(code_map)),
-      data_map_(std::move(data_map)),
+      data_pages_(std::move(data_pages)),
+      exec_pages_(std::move(exec_pages)),
+      non_exec_pages_(std::move(non_exec_pages)),
       max_capacity_(max_capacity),
-      current_capacity_(initial_code_capacity + initial_data_capacity),
-      code_end_(initial_code_capacity),
+      current_capacity_(initial_exec_capacity + initial_data_capacity),
       data_end_(initial_data_capacity),
+      exec_end_(initial_exec_capacity),
       last_collection_increased_code_cache_(false),
       garbage_collect_code_(garbage_collect_code),
       used_memory_for_data_(0),
@@ -284,40 +403,46 @@
       histogram_code_memory_use_("Memory used for compiled code", 16),
       histogram_profiling_info_memory_use_("Memory used for profiling info", 16),
       is_weak_access_enabled_(true),
-      inline_cache_cond_("Jit inline cache condition variable", lock_),
-      memmap_flags_prot_code_(memmap_flags_prot_code) {
+      inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
-  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
-  code_mspace_ = create_mspace_with_base(code_map_.Begin(), code_end_, false /*locked*/);
-  data_mspace_ = create_mspace_with_base(data_map_.Begin(), data_end_, false /*locked*/);
+  DCHECK_GE(max_capacity, initial_exec_capacity + initial_data_capacity);
 
-  if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
-    PLOG(FATAL) << "create_mspace_with_base failed";
+  // Initialize the data heap
+  data_mspace_ = create_mspace_with_base(data_pages_.Begin(), data_end_, false /*locked*/);
+  CHECK(data_mspace_ != nullptr) << "create_mspace_with_base (data) failed";
+
+  // Initialize the code heap
+  MemMap* code_heap = nullptr;
+  if (non_exec_pages_.IsValid()) {
+    code_heap = &non_exec_pages_;
+  } else if (exec_pages_.IsValid()) {
+    code_heap = &exec_pages_;
   }
-
-  SetFootprintLimit(current_capacity_);
-
-  CheckedCall(mprotect,
-              "mprotect jit code cache",
-              code_map_.Begin(),
-              code_map_.Size(),
-              memmap_flags_prot_code_);
-  CheckedCall(mprotect,
-              "mprotect jit data cache",
-              data_map_.Begin(),
-              data_map_.Size(),
-              kProtData);
+  if (code_heap != nullptr) {
+    // Make all pages reserved for the code heap writable. The mspace allocator, that manages the
+    // heap, will take and initialize pages in create_mspace_with_base().
+    CheckedCall(mprotect, "create code heap", code_heap->Begin(), code_heap->Size(), kProtRW);
+    exec_mspace_ = create_mspace_with_base(code_heap->Begin(), exec_end_, false /*locked*/);
+    CHECK(exec_mspace_ != nullptr) << "create_mspace_with_base (exec) failed";
+    SetFootprintLimit(current_capacity_);
+    // Protect pages containing heap metadata. Updates to the code heap toggle write permission to
+    // perform the update and there are no other times write access is required.
+    CheckedCall(mprotect, "protect code heap", code_heap->Begin(), code_heap->Size(), kProtR);
+  } else {
+    exec_mspace_ = nullptr;
+    SetFootprintLimit(current_capacity_);
+  }
 
   VLOG(jit) << "Created jit code cache: initial data size="
             << PrettySize(initial_data_capacity)
             << ", initial code size="
-            << PrettySize(initial_code_capacity);
+            << PrettySize(initial_exec_capacity);
 }
 
 JitCodeCache::~JitCodeCache() {}
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
-  return code_map_.Begin() <= ptr && ptr < code_map_.End();
+  return exec_pages_.Begin() <= ptr && ptr < exec_pages_.End();
 }
 
 bool JitCodeCache::WillExecuteJitCode(ArtMethod* method) {
@@ -385,22 +510,20 @@
       : ScopedTrace("ScopedCodeCacheWrite"),
         code_cache_(code_cache) {
     ScopedTrace trace("mprotect all");
-    CheckedCall(
-        mprotect,
-        "make code writable",
-        code_cache_->code_map_.Begin(),
-        code_cache_->code_map_.Size(),
-        code_cache_->memmap_flags_prot_code_ | PROT_WRITE);
+    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping();
+    if (updatable_pages != nullptr) {
+      int prot = code_cache_->HasDualCodeMapping() ? kProtRW : kProtRWX;
+      CheckedCall(mprotect, "Cache +W", updatable_pages->Begin(), updatable_pages->Size(), prot);
+    }
   }
 
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CheckedCall(
-        mprotect,
-        "make code protected",
-        code_cache_->code_map_.Begin(),
-        code_cache_->code_map_.Size(),
-        code_cache_->memmap_flags_prot_code_);
+    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping();
+    if (updatable_pages != nullptr) {
+      int prot = code_cache_->HasDualCodeMapping() ? kProtR : kProtRX;
+      CheckedCall(mprotect, "Cache -W", updatable_pages->Begin(), updatable_pages->Size(), prot);
+    }
   }
 
  private:
@@ -602,7 +725,13 @@
   if (OatQuickMethodHeader::FromCodePointer(code_ptr)->IsOptimized()) {
     FreeData(GetRootTable(code_ptr));
   }  // else this is a JNI stub without any data.
-  FreeCode(reinterpret_cast<uint8_t*>(allocation));
+
+  uint8_t* code_allocation = reinterpret_cast<uint8_t*>(allocation);
+  if (HasDualCodeMapping()) {
+    code_allocation = TranslateAddress(code_allocation, exec_pages_, non_exec_pages_);
+  }
+
+  FreeCode(code_allocation);
 }
 
 void JitCodeCache::FreeAllMethodHeaders(
@@ -753,6 +882,16 @@
   }
 }
 
+const MemMap* JitCodeCache::GetUpdatableCodeMapping() const {
+  if (HasDualCodeMapping()) {
+    return &non_exec_pages_;
+  } else if (HasCodeMapping()) {
+    return &exec_pages_;
+  } else {
+    return nullptr;
+  }
+}
+
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -773,38 +912,73 @@
     DCheckRootsAreValid(roots);
   }
 
-  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
-  // Ensure the header ends up at expected instruction alignment.
-  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
-  size_t total_size = header_size + code_size;
-
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
-  uint8_t* memory = nullptr;
+
   MutexLock mu(self, lock_);
   // We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to
   // finish.
   WaitForPotentialCollectionToCompleteRunnable(self);
   {
     ScopedCodeCacheWrite scc(this);
-    memory = AllocateCode(total_size);
-    if (memory == nullptr) {
+
+    size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+    // Ensure the header ends up at expected instruction alignment.
+    size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+    size_t total_size = header_size + code_size;
+
+    // AllocateCode allocates memory in non-executable region for alignment header and code. The
+    // header size may include alignment padding.
+    uint8_t* nox_memory = AllocateCode(total_size);
+    if (nox_memory == nullptr) {
       return nullptr;
     }
-    code_ptr = memory + header_size;
 
+    // code_ptr points to non-executable code.
+    code_ptr = nox_memory + header_size;
     std::copy(code, code + code_size, code_ptr);
     method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+
+    // From here code_ptr points to executable code.
+    if (HasDualCodeMapping()) {
+      code_ptr = TranslateAddress(code_ptr, non_exec_pages_, exec_pages_);
+    }
+
     new (method_header) OatQuickMethodHeader(
         (stack_map != nullptr) ? code_ptr - stack_map : 0u,
         code_size);
-    // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
-    // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
-    // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
-    // 6P) stop being supported or their kernels are fixed.
+
+    DCHECK(!Runtime::Current()->IsAotCompiler());
+    if (has_should_deoptimize_flag) {
+      method_header->SetHasShouldDeoptimizeFlag();
+    }
+
+    // Update method_header pointer to executable code region.
+    if (HasDualCodeMapping()) {
+      method_header = TranslateAddress(method_header, non_exec_pages_, exec_pages_);
+    }
+
+    // Both instruction and data caches need flushing to the point of unification where both share
+    // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
+    // newly added code are written out to the point of unification. Flushing the instruction
+    // cache ensures the newly written code will be fetched from the point of unification before
+    // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
+    // prevent stale code from residing in the instruction cache.
+    //
+    // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
+    // may trigger a segfault if a page fault occurs when requesting a cache maintenance
+    // operation. This is a kernel bug that we need to work around until affected devices
+    // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
     //
     // For reference, this behavior is caused by this commit:
     // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+    //
+    if (HasDualCodeMapping()) {
+      // Flush the data cache lines associated with the non-executable copy of the code just added.
+      FlushDataCache(nox_memory, nox_memory + total_size);
+    }
+    // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
+    // flushed is for the executable mapping of the code just added.
     FlushInstructionCache(code_ptr, code_ptr + code_size);
 
     // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
@@ -813,16 +987,14 @@
     // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
     // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
     // hardware support that broadcasts TLB invalidations and so their kernels have no software
-    // based TLB shootdown.
+    // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
+    // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
+    // platforms lacking the appropriate support.
     art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
 
-    DCHECK(!Runtime::Current()->IsAotCompiler());
-    if (has_should_deoptimize_flag) {
-      method_header->SetHasShouldDeoptimizeFlag();
-    }
-
     number_of_compilations_++;
   }
+
   // We need to update the entry point in the runnable state for the instrumentation.
   {
     // The following needs to be guarded by cha_lock_ also. Otherwise it's possible that the
@@ -1167,9 +1339,9 @@
   DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
   DCHECK_EQ(per_space_footprint * 2, new_footprint);
   mspace_set_footprint_limit(data_mspace_, per_space_footprint);
-  {
+  if (HasCodeMapping()) {
     ScopedCodeCacheWrite scc(this);
-    mspace_set_footprint_limit(code_mspace_, per_space_footprint);
+    mspace_set_footprint_limit(exec_mspace_, per_space_footprint);
   }
 }
 
@@ -1244,8 +1416,8 @@
       number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
-          reinterpret_cast<uintptr_t>(code_map_.Begin()),
-          reinterpret_cast<uintptr_t>(code_map_.Begin() + current_capacity_ / 2)));
+          reinterpret_cast<uintptr_t>(exec_pages_.Begin()),
+          reinterpret_cast<uintptr_t>(exec_pages_.Begin() + current_capacity_ / 2)));
       collection_in_progress_ = true;
     }
   }
@@ -1614,15 +1786,17 @@
 // NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
 // is already held.
 void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
-  if (code_mspace_ == mspace) {
-    size_t result = code_end_;
-    code_end_ += increment;
-    return reinterpret_cast<void*>(result + code_map_.Begin());
+  if (mspace == exec_mspace_) {
+    DCHECK(exec_mspace_ != nullptr);
+    const MemMap* const code_pages = GetUpdatableCodeMapping();
+    void* result = code_pages->Begin() + exec_end_;
+    exec_end_ += increment;
+    return result;
   } else {
     DCHECK_EQ(data_mspace_, mspace);
-    size_t result = data_end_;
+    void* result = data_pages_.Begin() + data_end_;
     data_end_ += increment;
-    return reinterpret_cast<void*>(result + data_map_.Begin());
+    return result;
   }
 }
 
@@ -1849,7 +2023,7 @@
 uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   uint8_t* result = reinterpret_cast<uint8_t*>(
-      mspace_memalign(code_mspace_, alignment, code_size));
+      mspace_memalign(exec_mspace_, alignment, code_size));
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   // Ensure the header ends up at expected instruction alignment.
   DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
@@ -1859,7 +2033,7 @@
 
 void JitCodeCache::FreeCode(uint8_t* code) {
   used_memory_for_code_ -= mspace_usable_size(code);
-  mspace_free(code_mspace_, code);
+  mspace_free(exec_mspace_, code);
 }
 
 uint8_t* JitCodeCache::AllocateData(size_t data_size) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e2aa01c..76ad8db 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -223,7 +223,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
-    return mspace == code_mspace_ || mspace == data_mspace_;
+    return mspace == data_mspace_ || mspace == exec_mspace_;
   }
 
   void* MoreCore(const void* mspace, intptr_t increment);
@@ -279,13 +279,13 @@
 
  private:
   // Take ownership of maps.
-  JitCodeCache(MemMap&& code_map,
-               MemMap&& data_map,
-               size_t initial_code_capacity,
+  JitCodeCache(MemMap&& data_pages,
+               MemMap&& exec_pages,
+               MemMap&& non_exec_pages,
                size_t initial_data_capacity,
+               size_t initial_exec_capacity,
                size_t max_capacity,
-               bool garbage_collect_code,
-               int memmap_flags_prot_code);
+               bool garbage_collect_code);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -381,6 +381,16 @@
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
   void FreeData(uint8_t* data) REQUIRES(lock_);
 
+  bool HasDualCodeMapping() const {
+    return non_exec_pages_.IsValid();
+  }
+
+  bool HasCodeMapping() const {
+    return exec_pages_.IsValid();
+  }
+
+  const MemMap* GetUpdatableCodeMapping() const;
+
   bool IsWeakAccessEnabled(Thread* self) const;
   void WaitUntilInlineCacheAccessible(Thread* self)
       REQUIRES(!lock_)
@@ -395,14 +405,17 @@
   ConditionVariable lock_cond_ GUARDED_BY(lock_);
   // Whether there is a code cache collection in progress.
   bool collection_in_progress_ GUARDED_BY(lock_);
-  // Mem map which holds code.
-  MemMap code_map_;
   // Mem map which holds data (stack maps and profiling info).
-  MemMap data_map_;
-  // The opaque mspace for allocating code.
-  void* code_mspace_ GUARDED_BY(lock_);
+  MemMap data_pages_;
+  // Mem map which holds code and has executable permission.
+  MemMap exec_pages_;
+  // Mem map which holds code with non executable permission. Only valid for dual view JIT when
+  // this is the non-executable view of code used to write updates.
+  MemMap non_exec_pages_;
   // The opaque mspace for allocating data.
   void* data_mspace_ GUARDED_BY(lock_);
+  // The opaque mspace for allocating code.
+  void* exec_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
   // Holds compiled code associated with the shorty for a JNI stub.
@@ -420,12 +433,12 @@
   // The current capacity in bytes of the code cache.
   size_t current_capacity_ GUARDED_BY(lock_);
 
-  // The current footprint in bytes of the code portion of the code cache.
-  size_t code_end_ GUARDED_BY(lock_);
-
   // The current footprint in bytes of the data portion of the code cache.
   size_t data_end_ GUARDED_BY(lock_);
 
+  // The current footprint in bytes of the code portion of the code cache.
+  size_t exec_end_ GUARDED_BY(lock_);
+
   // Whether the last collection round increased the code cache.
   bool last_collection_increased_code_cache_ GUARDED_BY(lock_);
 
@@ -464,9 +477,6 @@
   // Condition to wait on for accessing inline caches.
   ConditionVariable inline_cache_cond_ GUARDED_BY(lock_);
 
-  // Mapping flags for the code section.
-  const int memmap_flags_prot_code_;
-
   friend class art::JitJniStubTestHelper;
   friend class ScopedCodeCacheWrite;