mm, proc: reduce cost of /proc/pid/smaps for unpopulated shmem mappings

Following the previous patch, further reduction of /proc/pid/smaps cost
is possible for private writable shmem mappings with unpopulated areas
where the page walk invokes the .pte_hole function.  We can use radix
tree iterator for each such area instead of calling find_get_entry() in
a loop.  This is possible at the extra maintenance cost of introducing
another shmem function shmem_partial_swap_usage().

To demonstrate the diference, I have measured this on a process that
creates a private writable 2GB mapping of a partially swapped out
/dev/shm/file (which cannot employ the optimizations from the prvious
patch) and doesn't populate it at all.  I time how long does it take to
cat /proc/pid/smaps of this process 100 times.

Before this patch:

real    0m3.831s
user    0m0.180s
sys     0m3.212s

After this patch:

real    0m1.176s
user    0m0.180s
sys     0m0.684s

The time is similar to the case where a radix tree iterator is employed
on the whole mapping.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/shmem.c b/mm/shmem.c
index e978621..760d90c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -361,41 +361,18 @@
 
 /*
  * Determine (in bytes) how many of the shmem object's pages mapped by the
- * given vma is swapped out.
+ * given offsets are swapped out.
  *
  * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
-unsigned long shmem_swap_usage(struct vm_area_struct *vma)
+unsigned long shmem_partial_swap_usage(struct address_space *mapping,
+						pgoff_t start, pgoff_t end)
 {
-	struct inode *inode = file_inode(vma->vm_file);
-	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long swapped;
-	pgoff_t start, end;
 	struct radix_tree_iter iter;
 	void **slot;
 	struct page *page;
-
-	/* Be careful as we don't hold info->lock */
-	swapped = READ_ONCE(info->swapped);
-
-	/*
-	 * The easier cases are when the shmem object has nothing in swap, or
-	 * the vma maps it whole. Then we can simply use the stats that we
-	 * already track.
-	 */
-	if (!swapped)
-		return 0;
-
-	if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
-		return swapped << PAGE_SHIFT;
-
-	swapped = 0;
-
-	/* Here comes the more involved part */
-	start = linear_page_index(vma, vma->vm_start);
-	end = linear_page_index(vma, vma->vm_end);
+	unsigned long swapped = 0;
 
 	rcu_read_lock();
 
@@ -430,6 +407,40 @@
 }
 
 /*
+ * Determine (in bytes) how many of the shmem object's pages mapped by the
+ * given vma is swapped out.
+ *
+ * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * as long as the inode doesn't go away and racy results are not a problem.
+ */
+unsigned long shmem_swap_usage(struct vm_area_struct *vma)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct address_space *mapping = inode->i_mapping;
+	unsigned long swapped;
+
+	/* Be careful as we don't hold info->lock */
+	swapped = READ_ONCE(info->swapped);
+
+	/*
+	 * The easier cases are when the shmem object has nothing in swap, or
+	 * the vma maps it whole. Then we can simply use the stats that we
+	 * already track.
+	 */
+	if (!swapped)
+		return 0;
+
+	if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
+		return swapped << PAGE_SHIFT;
+
+	/* Here comes the more involved part */
+	return shmem_partial_swap_usage(mapping,
+			linear_page_index(vma, vma->vm_start),
+			linear_page_index(vma, vma->vm_end));
+}
+
+/*
  * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
  */
 void shmem_unlock_mapping(struct address_space *mapping)