mm: protect VMA modifications using VMA sequence count
The VMA sequence count has been introduced to allow fast detection of
VMA modification when running a page fault handler without holding
the mmap_sem.
This patch provides protection against the VMA modification done in :
- madvise()
- mpol_rebind_policy()
- vma_replace_policy()
- change_prot_numa()
- mlock(), munlock()
- mprotect()
- mmap_region()
- collapse_huge_page()
- userfaultd registering services
In addition, VMA fields which will be read during the speculative fault
path needs to be written using WRITE_ONCE to prevent write to be split
and intermediate values to be pushed to other CPUs.
Change-Id: I806e51cb62279f8f0b5c37ed886b5a90bd8ad49b
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Patch-mainline: linux-mm @ Tue, 17 Apr 2018 16:33:15
[vinmenon@codeaurora.org: 4.9 porting changes + checkpatch fixes]
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
diff --git a/mm/mmap.c b/mm/mmap.c
index b6a3f47..c151b7d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -841,17 +841,18 @@
}
if (start != vma->vm_start) {
- vma->vm_start = start;
+ WRITE_ONCE(vma->vm_start, start);
start_changed = true;
}
if (end != vma->vm_end) {
- vma->vm_end = end;
+ WRITE_ONCE(vma->vm_end, end);
end_changed = true;
}
- vma->vm_pgoff = pgoff;
+ WRITE_ONCE(vma->vm_pgoff, pgoff);
if (adjust_next) {
- next->vm_start += adjust_next << PAGE_SHIFT;
- next->vm_pgoff += adjust_next;
+ WRITE_ONCE(next->vm_start,
+ next->vm_start + (adjust_next << PAGE_SHIFT));
+ WRITE_ONCE(next->vm_pgoff, next->vm_pgoff + adjust_next);
}
if (root) {
@@ -1756,13 +1757,15 @@
out:
perf_event_mmap(vma);
+ vm_write_begin(vma);
vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current->mm)))
mm->locked_vm += (len >> PAGE_SHIFT);
else
- vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ WRITE_ONCE(vma->vm_flags,
+ vma->vm_flags & VM_LOCKED_CLEAR_MASK);
}
if (file)
@@ -1775,9 +1778,10 @@
* then new mapped in-place (which must be aimed as
* a completely new data area).
*/
- vma->vm_flags |= VM_SOFTDIRTY;
+ WRITE_ONCE(vma->vm_flags, vma->vm_flags | VM_SOFTDIRTY);
vma_set_page_prot(vma);
+ vm_write_end(vma);
return addr;
@@ -2411,8 +2415,8 @@
mm->locked_vm += grow;
vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
- vma->vm_start = address;
- vma->vm_pgoff -= grow;
+ WRITE_ONCE(vma->vm_start, address);
+ WRITE_ONCE(vma->vm_pgoff, vma->vm_pgoff - grow);
anon_vma_interval_tree_post_update_vma(vma);
vma_gap_update(vma);
spin_unlock(&mm->page_table_lock);