Use the new batched user accesses in generic user string handling

This converts the generic user string functions to use the batched user
access functions.

It makes a big difference on Skylake, which is the first x86
microarchitecture to implement SMAP.  The STAC/CLAC instructions are not
very fast, and doing them for each access inside the loop that copies
strings from user space (which is what the pathname handling does for
every pathname the kernel uses, for example) is very inefficient.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 3a5f2b3..2625943 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,7 +45,7 @@
 	src -= align;
 	max += align;
 
-	if (unlikely(__get_user(c,(unsigned long __user *)src)))
+	if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
 		return 0;
 	c |= aligned_byte_mask(align);
 
@@ -61,7 +61,7 @@
 		if (unlikely(max <= sizeof(unsigned long)))
 			break;
 		max -= sizeof(unsigned long);
-		if (unlikely(__get_user(c,(unsigned long __user *)(src+res))))
+		if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
 			return 0;
 	}
 	res -= align;
@@ -112,7 +112,12 @@
 	src_addr = (unsigned long)str;
 	if (likely(src_addr < max_addr)) {
 		unsigned long max = max_addr - src_addr;
-		return do_strnlen_user(str, count, max);
+		long retval;
+
+		user_access_begin();
+		retval = do_strnlen_user(str, count, max);
+		user_access_end();
+		return retval;
 	}
 	return 0;
 }
@@ -141,7 +146,12 @@
 	src_addr = (unsigned long)str;
 	if (likely(src_addr < max_addr)) {
 		unsigned long max = max_addr - src_addr;
-		return do_strnlen_user(str, ~0ul, max);
+		long retval;
+
+		user_access_begin();
+		retval = do_strnlen_user(str, ~0ul, max);
+		user_access_end();
+		return retval;
 	}
 	return 0;
 }