radix-tree: improve multiorder iterators

This fixes several interlinked problems with the iterators in the
presence of multiorder entries.

1. radix_tree_iter_next() would only advance by one slot, which would
   result in the iterators returning the same entry more than once if
   there were sibling entries.

2. radix_tree_next_slot() could return an internal pointer instead of
   a user pointer if a tagged multiorder entry was immediately followed by
   an entry of lower order.

3. radix_tree_next_slot() expanded to a lot more code than it used to
   when multiorder support was compiled in.  And I wasn't comfortable with
   entry_to_node() being in a header file.

Fixing radix_tree_iter_next() for the presence of sibling entries
necessarily involves examining the contents of the radix tree, so we now
need to pass 'slot' to radix_tree_iter_next(), and we need to change the
calling convention so it is called *before* dropping the lock which
protects the tree.  Also rename it to radix_tree_iter_resume(), as some
people thought it was necessary to call radix_tree_iter_next() each time
around the loop.

radix_tree_next_slot() becomes closer to how it looked before multiorder
support was introduced.  It only checks to see if the next entry in the
chunk is a sibling entry or a pointer to a node; this should be rare
enough that handling this case out of line is not a performance impact
(and such impact is amortised by the fact that the entry we just
processed was a multiorder entry).  Also, radix_tree_next_slot() used to
force a new chunk lookup for untagged entries, which is more expensive
than the out of line sibling entry skipping.

Link: http://lkml.kernel.org/r/1480369871-5271-55-git-send-email-mawilcox@linuxonhyperv.com
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index df71cb8..f328a66 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -48,8 +48,8 @@ static void *add_entries_fn(void *arg)
 /*
  * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
  * things that have been removed and randomly resetting our iteration to the
- * next chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
- * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * next chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
  * NULL 'slot' variable.
  */
 static void *tagged_iteration_fn(void *arg)
@@ -79,7 +79,7 @@ static void *tagged_iteration_fn(void *arg)
 			}
 
 			if (rand_r(&seeds[0]) % 50 == 0) {
-				slot = radix_tree_iter_next(&iter);
+				slot = radix_tree_iter_resume(slot, &iter);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
@@ -96,8 +96,8 @@ static void *tagged_iteration_fn(void *arg)
 /*
  * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
  * that have been removed and randomly resetting our iteration to the next
- * chunk with radix_tree_iter_next().  Both radix_tree_iter_retry() and
- * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
  * NULL 'slot' variable.
  */
 static void *untagged_iteration_fn(void *arg)
@@ -127,7 +127,7 @@ static void *untagged_iteration_fn(void *arg)
 			}
 
 			if (rand_r(&seeds[1]) % 50 == 0) {
-				slot = radix_tree_iter_next(&iter);
+				slot = radix_tree_iter_resume(slot, &iter);
 				rcu_read_unlock();
 				rcu_barrier();
 				rcu_read_lock();
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 8d5865c..b9be885 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -231,11 +231,14 @@ void multiorder_iteration(void)
 		radix_tree_for_each_slot(slot, &tree, &iter, j) {
 			int height = order[i] / RADIX_TREE_MAP_SHIFT;
 			int shift = height * RADIX_TREE_MAP_SHIFT;
-			int mask = (1 << order[i]) - 1;
+			unsigned long mask = (1UL << order[i]) - 1;
+			struct item *item = *slot;
 
-			assert(iter.index >= (index[i] &~ mask));
-			assert(iter.index <= (index[i] | mask));
+			assert((iter.index | mask) == (index[i] | mask));
 			assert(iter.shift == shift);
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (index[i] | mask));
+			assert(item->order == order[i]);
 			i++;
 		}
 	}
@@ -269,7 +272,7 @@ void multiorder_tagged_iteration(void)
 		assert(radix_tree_tag_set(&tree, tag_index[i], 1));
 
 	for (j = 0; j < 256; j++) {
-		int mask, k;
+		int k;
 
 		for (i = 0; i < TAG_ENTRIES; i++) {
 			for (k = i; index[k] < tag_index[i]; k++)
@@ -279,12 +282,16 @@ void multiorder_tagged_iteration(void)
 		}
 
 		radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+			unsigned long mask;
+			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
-			mask = (1 << order[k]) - 1;
+			mask = (1UL << order[k]) - 1;
 
-			assert(iter.index >= (tag_index[i] &~ mask));
-			assert(iter.index <= (tag_index[i] | mask));
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
 			i++;
 		}
 	}
@@ -303,12 +310,15 @@ void multiorder_tagged_iteration(void)
 		}
 
 		radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
+			struct item *item = *slot;
 			for (k = i; index[k] < tag_index[i]; k++)
 				;
 			mask = (1 << order[k]) - 1;
 
-			assert(iter.index >= (tag_index[i] &~ mask));
-			assert(iter.index <= (tag_index[i] | mask));
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
 			i++;
 		}
 	}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index 1f06ed7..b594841 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -5,7 +5,7 @@
  * In following radix_tree_next_slot current chunk size becomes zero.
  * This isn't checked and it tries to dereference null pointer in slot.
  *
- * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1,
+ * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1,
  * for tagger iteraction it also must reset cached tags in iterator to abort
  * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
  *
@@ -88,7 +88,7 @@ void regression3_test(void)
 		printf("slot %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
@@ -96,7 +96,7 @@ void regression3_test(void)
 		printf("contig %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
@@ -106,7 +106,7 @@ void regression3_test(void)
 		printf("tagged %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
 			printf("next at %ld\n", iter.index);
-			slot = radix_tree_iter_next(&iter);
+			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 423c528..617416e 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -41,6 +41,7 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
 extern int nr_allocated;
 
 /* Normally private parts of lib/radix-tree.c */
+struct radix_tree_node *entry_to_node(void *ptr);
 void radix_tree_dump(struct radix_tree_root *root);
 int root_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned long node_maxindex(struct radix_tree_node *);