page cache: use xa_lock Remove the address_space ->tree_lock and use the xa_lock newly added to the radix_tree_root. Rename the address_space ->page_tree to ->i_pages, since we don't really care that it's a tree. [willy@infradead.org: fix nds32, fs/dax.c] Link: http://lkml.kernel.org/r/20180406145415.GB20605@bombadil.infradead.orgLink: http://lkml.kernel.org/r/20180313132639.17387-9-willy@infradead.org Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Acked-by: Jeff Layton <jlayton@redhat.com> Cc: Darrick J. Wong <darrick.wong@oracle.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Cc: Will Deacon <will.deacon@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: b93b016313b3ba8003c3b8bb71f569af91f19fc7 [log] [tgz]
author: Matthew Wilcox <mawilcox@microsoft.com> Tue Apr 10 16:36:56 2018 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Wed Apr 11 10:28:39 2018 -0700
tree: ad4be96414189dcdf8c972f351ba430996e9fdff
parent: f6bb2a2c0b81c47282ddb7883f92e65a063c27dd [diff]
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e6cbb91..09da0f1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h

@@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
  * @inode: inode of interest
  *
  * Returns the wb @inode is currently associated with.  The caller must be
- * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
+ * holding either @inode->i_lock, the i_pages lock, or the
  * associated wb's list_lock.
  */
 static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
@@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 #ifdef CONFIG_LOCKDEP
 	WARN_ON_ONCE(debug_locks &&
 		     (!lockdep_is_held(&inode->i_lock) &&
-		      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
+		      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 		      !lockdep_is_held(&inode->i_wb->list_lock)));
 #endif
 	return inode->i_wb;
@@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * @lockedp: temp bool output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
- * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
+ * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
  * function determines the wb associated with @inode and ensures that the
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
@@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
 	if (unlikely(*lockedp))
-		spin_lock_irq(&inode->i_mapping->tree_lock);
+		xa_lock_irq(&inode->i_mapping->i_pages);
 
 	/*
-	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
-	 * inode_to_wb() will bark.  Deref directly.
+	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
+	 * lock.  inode_to_wb() will bark.  Deref directly.
 	 */
 	return inode->i_wb;
 }
@@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 {
 	if (unlikely(locked))
-		spin_unlock_irq(&inode->i_mapping->tree_lock);
+		xa_unlock_irq(&inode->i_mapping->i_pages);
 
 	rcu_read_unlock();
 }

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2aa02ca..92efaf1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -13,6 +13,7 @@
 #include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
@@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
-	struct radix_tree_root	page_tree;	/* radix tree of all pages */
-	spinlock_t		tree_lock;	/* and lock protecting it */
+	struct radix_tree_root	i_pages;	/* cached pages */
 	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root_cached	i_mmap;		/* tree of private and shared mappings */
 	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
-	/* Protected by tree_lock together with the radix tree */
+	/* Protected by the i_pages lock */
 	unsigned long		nrpages;	/* number of total pages */
 	/* number of shadow or DAX exceptional entries */
 	unsigned long		nrexceptional;
@@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  *
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
- *			wb stat updates to grab mapping->tree_lock.  See
+ *			wb stat updates to grab the i_pages lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f13bc25..1ac1f06 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
  * refcount. The each user mapping also has a reference to the page.
  *
  * The pagecache pages are stored in a per-mapping radix tree, which is
- * rooted at mapping->page_tree, and indexed by offset.
+ * rooted at mapping->i_pages, and indexed by offset.
  * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
  * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34ce3eb..b1bd218 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr);
  * 3. check the page is still in pagecache (if no, goto 1)
  *
  * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with tree_lock held for write):
+ * following (with the i_pages lock held):
  * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
  * B. remove page from pagecache
  * C. free the page
@@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr);
  *
  * It is possible that between 1 and 2, the page is removed then the exact same
  * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using tree_lock could equally have run before or after
+ * old find_get_page using a lock could equally have run before or after
  * such a re-insertion, depending on order that locks are granted.
  *
  * Lookups racing against pagecache insertion isn't a big problem: either 1
commit	b93b016313b3ba8003c3b8bb71f569af91f19fc7	[log] [tgz]
author	Matthew Wilcox <mawilcox@microsoft.com>	Tue Apr 10 16:36:56 2018 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Wed Apr 11 10:28:39 2018 -0700
tree	ad4be96414189dcdf8c972f351ba430996e9fdff
parent	f6bb2a2c0b81c47282ddb7883f92e65a063c27dd [diff]