mm, thp: Do not make pmd/pud dirty without a reason

Currently we make page table entries dirty all the time regardless of
access type and don't even consider if the mapping is write-protected.
The reasoning is that we don't really need dirty tracking on THP and
making the entry dirty upfront may save some time on first write to the
page.

Unfortunately, such approach may result in false-positive
can_follow_write_pmd() for huge zero page or read-only shmem file.

Let's only make page dirty only if we about to write to the page anyway
(as we do for small pages).

I've restructured the code to make entry dirty inside
maybe_p[mu]d_mkwrite(). It also takes into account if the vma is
write-protected.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0e7ded9..f22401f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -474,10 +474,13 @@
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma, bool dirty)
 {
-	if (likely(vma->vm_flags & VM_WRITE))
+	if (likely(vma->vm_flags & VM_WRITE)) {
 		pmd = pmd_mkwrite(pmd);
+		if (dirty)
+			pmd = pmd_mkdirty(pmd);
+	}
 	return pmd;
 }
 
@@ -599,7 +602,7 @@
 		}
 
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
-		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+		entry = maybe_pmd_mkwrite(entry, vma, true);
 		page_add_new_anon_rmap(page, vma, haddr, true);
 		mem_cgroup_commit_charge(page, memcg, false, true);
 		lru_cache_add_active_or_unevictable(page, vma);
@@ -741,8 +744,8 @@
 	if (pfn_t_devmap(pfn))
 		entry = pmd_mkdevmap(entry);
 	if (write) {
-		entry = pmd_mkyoung(pmd_mkdirty(entry));
-		entry = maybe_pmd_mkwrite(entry, vma);
+		entry = pmd_mkyoung(entry);
+		entry = maybe_pmd_mkwrite(entry, vma, true);
 	}
 
 	if (pgtable) {
@@ -788,10 +791,14 @@
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
+static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma,
+		bool dirty)
 {
-	if (likely(vma->vm_flags & VM_WRITE))
+	if (likely(vma->vm_flags & VM_WRITE)) {
 		pud = pud_mkwrite(pud);
+		if (dirty)
+			pud = pud_mkdirty(pud);
+	}
 	return pud;
 }
 
@@ -807,8 +814,8 @@
 	if (pfn_t_devmap(pfn))
 		entry = pud_mkdevmap(entry);
 	if (write) {
-		entry = pud_mkyoung(pud_mkdirty(entry));
-		entry = maybe_pud_mkwrite(entry, vma);
+		entry = pud_mkyoung(entry);
+		entry = maybe_pud_mkwrite(entry, vma, true);
 	}
 	set_pud_at(mm, addr, pud, entry);
 	update_mmu_cache_pud(vma, addr, pud);
@@ -1279,7 +1286,7 @@
 	if (reuse_swap_page(page, NULL)) {
 		pmd_t entry;
 		entry = pmd_mkyoung(orig_pmd);
-		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+		entry = maybe_pmd_mkwrite(entry, vma, true);
 		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry,  1))
 			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		ret |= VM_FAULT_WRITE;
@@ -1349,7 +1356,7 @@
 	} else {
 		pmd_t entry;
 		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+		entry = maybe_pmd_mkwrite(entry, vma, true);
 		pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr, true);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
@@ -2928,7 +2935,7 @@
 	if (pmd_swp_soft_dirty(*pvmw->pmd))
 		pmde = pmd_mksoft_dirty(pmde);
 	if (is_write_migration_entry(entry))
-		pmde = maybe_pmd_mkwrite(pmde, vma);
+		pmde = maybe_pmd_mkwrite(pmde, vma, false);
 
 	flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE);
 	page_add_anon_rmap(new, vma, mmun_start, true);
diff --git a/mm/internal.h b/mm/internal.h
index e6bd351..b35cdeb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -328,7 +328,8 @@
 	}
 }
 
-extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma,
+		bool dirty);
 
 /*
  * At what user virtual address is page expected in @vma?
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ea4ff25..db43dc8 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1057,7 +1057,7 @@
 	pgtable = pmd_pgtable(_pmd);
 
 	_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
-	_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
+	_pmd = maybe_pmd_mkwrite(_pmd, vma, false);
 
 	/*
 	 * spin_lock() below is not the equivalent of smp_wmb(), so
diff --git a/mm/memory.c b/mm/memory.c
index 85e7a87..b10c1d2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3335,7 +3335,7 @@
 
 	entry = mk_huge_pmd(page, vma->vm_page_prot);
 	if (write)
-		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+		entry = maybe_pmd_mkwrite(entry, vma, true);
 
 	add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
 	page_add_file_rmap(page, true);
diff --git a/mm/migrate.c b/mm/migrate.c
index 4d0be47..57865fc 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2068,7 +2068,7 @@
 	}
 
 	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
-	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+	entry = maybe_pmd_mkwrite(entry, vma, false);
 
 	/*
 	 * Clear the old entry under pagetable lock and establish the new PTE.