Merge branches 'doc.2015.07.15a' and 'torture.2015.07.15a' into HEAD

doc.2015.07.15a: Documentation updates.
torture.2015.07.15a: Torture-test updates.
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index 1e6c0da..c0bf244 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -28,7 +28,7 @@
 o	Avoid cancellation when using the "+" and "-" infix arithmetic
 	operators.  For example, for a given variable "x", avoid
 	"(x-x)".  There are similar arithmetic pitfalls from other
-	arithmetic operatiors, such as "(x*0)", "(x/(x+1))" or "(x%1)".
+	arithmetic operators, such as "(x*0)", "(x/(x+1))" or "(x%1)".
 	The compiler is within its rights to substitute zero for all of
 	these expressions, so that subsequent accesses no longer depend
 	on the rcu_dereference(), again possibly resulting in bugs due
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d6f045..01b5b68 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3135,22 +3135,35 @@
 			in a given burst of a callback-flood test.
 
 	rcutorture.fqs_duration= [KNL]
-			Set duration of force_quiescent_state bursts.
+			Set duration of force_quiescent_state bursts
+			in microseconds.
 
 	rcutorture.fqs_holdoff= [KNL]
-			Set holdoff time within force_quiescent_state bursts.
+			Set holdoff time within force_quiescent_state bursts
+			in microseconds.
 
 	rcutorture.fqs_stutter= [KNL]
-			Set wait time between force_quiescent_state bursts.
+			Set wait time between force_quiescent_state bursts
+			in seconds.
+
+	rcutorture.gp_cond= [KNL]
+			Use conditional/asynchronous update-side
+			primitives, if available.
 
 	rcutorture.gp_exp= [KNL]
-			Use expedited update-side primitives.
+			Use expedited update-side primitives, if available.
 
 	rcutorture.gp_normal= [KNL]
-			Use normal (non-expedited) update-side primitives.
-			If both gp_exp and gp_normal are set, do both.
-			If neither gp_exp nor gp_normal are set, still
-			do both.
+			Use normal (non-expedited) asynchronous
+			update-side primitives, if available.
+
+	rcutorture.gp_sync= [KNL]
+			Use normal (non-expedited) synchronous
+			update-side primitives, if available.  If all
+			of rcutorture.gp_cond=, rcutorture.gp_exp=,
+			rcutorture.gp_normal=, and rcutorture.gp_sync=
+			are zero, rcutorture acts as if is interpreted
+			they are all non-zero.
 
 	rcutorture.n_barrier_cbs= [KNL]
 			Set callbacks/threads for rcu_barrier() testing.
@@ -3177,9 +3190,6 @@
 			Set time (s) between CPU-hotplug operations, or
 			zero to disable CPU-hotplug testing.
 
-	rcutorture.torture_runnable= [BOOT]
-			Start rcutorture running at boot time.
-
 	rcutorture.shuffle_interval= [KNL]
 			Set task-shuffle interval (s).  Shuffling tasks
 			allows some CPUs to go into dyntick-idle mode
@@ -3220,6 +3230,9 @@
 			Test RCU's dyntick-idle handling.  See also the
 			rcutorture.shuffle_interval parameter.
 
+	rcutorture.torture_runnable= [BOOT]
+			Start rcutorture running at boot time.
+
 	rcutorture.torture_type= [KNL]
 			Specify the RCU implementation to test.
 
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 13feb69..3185238 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -194,22 +194,22 @@
  (*) On any given CPU, dependent memory accesses will be issued in order, with
      respect to itself.  This means that for:
 
-	ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
+	WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
 
      the CPU will issue the following memory operations:
 
 	Q = LOAD P, D = LOAD *Q
 
      and always in that order.  On most systems, smp_read_barrier_depends()
-     does nothing, but it is required for DEC Alpha.  The ACCESS_ONCE()
-     is required to prevent compiler mischief.  Please note that you
-     should normally use something like rcu_dereference() instead of
-     open-coding smp_read_barrier_depends().
+     does nothing, but it is required for DEC Alpha.  The READ_ONCE()
+     and WRITE_ONCE() are required to prevent compiler mischief.  Please
+     note that you should normally use something like rcu_dereference()
+     instead of open-coding smp_read_barrier_depends().
 
  (*) Overlapping loads and stores within a particular CPU will appear to be
      ordered within that CPU.  This means that for:
 
-	a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
+	a = READ_ONCE(*X); WRITE_ONCE(*X, b);
 
      the CPU will only issue the following sequence of memory operations:
 
@@ -217,7 +217,7 @@
 
      And for:
 
-	ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
+	WRITE_ONCE(*X, c); d = READ_ONCE(*X);
 
      the CPU will only issue:
 
@@ -228,11 +228,11 @@
 
 And there are a number of things that _must_ or _must_not_ be assumed:
 
- (*) It _must_not_ be assumed that the compiler will do what you want with
-     memory references that are not protected by ACCESS_ONCE().  Without
-     ACCESS_ONCE(), the compiler is within its rights to do all sorts
-     of "creative" transformations, which are covered in the Compiler
-     Barrier section.
+ (*) It _must_not_ be assumed that the compiler will do what you want
+     with memory references that are not protected by READ_ONCE() and
+     WRITE_ONCE().  Without them, the compiler is within its rights to
+     do all sorts of "creative" transformations, which are covered in
+     the Compiler Barrier section.
 
  (*) It _must_not_ be assumed that independent loads and stores will be issued
      in the order given.  This means that for:
@@ -520,8 +520,8 @@
 	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = &B
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, &B)
+			      Q = READ_ONCE(P);
 			      D = *Q;
 
 There's a clear data dependency here, and it would seem that by the end of the
@@ -547,8 +547,8 @@
 	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = &B
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, &B);
+			      Q = READ_ONCE(P);
 			      <data dependency barrier>
 			      D = *Q;
 
@@ -574,8 +574,8 @@
 	{ M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
 	M[1] = 4;
 	<write barrier>
-	ACCESS_ONCE(P) = 1
-			      Q = ACCESS_ONCE(P);
+	WRITE_ONCE(P, 1);
+			      Q = READ_ONCE(P);
 			      <data dependency barrier>
 			      D = M[Q];
 
@@ -596,10 +596,10 @@
 simply a data dependency barrier to make it work correctly.  Consider the
 following bit of code:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		<data dependency barrier>  /* BUG: No data dependency!!! */
-		p = ACCESS_ONCE(b);
+		p = READ_ONCE(b);
 	}
 
 This will not have the desired effect because there is no actual data
@@ -608,10 +608,10 @@
 the load from b as having happened before the load from a.  In such a
 case what's actually required is:
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		<read barrier>
-		p = ACCESS_ONCE(b);
+		p = READ_ONCE(b);
 	}
 
 However, stores are not speculated.  This means that ordering -is- provided
@@ -619,7 +619,7 @@
 
 	q = READ_ONCE_CTRL(a);
 	if (q) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 	}
 
 Control dependencies pair normally with other types of barriers.  That
@@ -647,11 +647,11 @@
 	q = READ_ONCE_CTRL(a);
 	if (q) {
 		barrier();
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
 		barrier();
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something_else();
 	}
 
@@ -660,12 +660,12 @@
 
 	q = READ_ONCE_CTRL(a);
 	barrier();
-	ACCESS_ONCE(b) = p;  /* BUG: No ordering vs. load from a!!! */
+	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
-		/* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
 		do_something();
 	} else {
-		/* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
 		do_something_else();
 	}
 
@@ -676,7 +676,7 @@
 Therefore, if you need ordering in this example, you need explicit
 memory barriers, for example, smp_store_release():
 
-	q = ACCESS_ONCE(a);
+	q = READ_ONCE(a);
 	if (q) {
 		smp_store_release(&b, p);
 		do_something();
@@ -690,10 +690,10 @@
 
 	q = READ_ONCE_CTRL(a);
 	if (q) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -706,10 +706,10 @@
 
 	q = READ_ONCE_CTRL(a);
 	if (q % MAX) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -718,7 +718,7 @@
 transform the above code into the following:
 
 	q = READ_ONCE_CTRL(a);
-	ACCESS_ONCE(b) = p;
+	WRITE_ONCE(b, p);
 	do_something_else();
 
 Given this transformation, the CPU is not required to respect the ordering
@@ -731,10 +731,10 @@
 	q = READ_ONCE_CTRL(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
-		ACCESS_ONCE(b) = p;
+		WRITE_ONCE(b, p);
 		do_something();
 	} else {
-		ACCESS_ONCE(b) = r;
+		WRITE_ONCE(b, r);
 		do_something_else();
 	}
 
@@ -746,18 +746,18 @@
 evaluation.  Consider this example:
 
 	q = READ_ONCE_CTRL(a);
-	if (a || 1 > 0)
-		ACCESS_ONCE(b) = 1;
+	if (q || 1 > 0)
+		WRITE_ONCE(b, 1);
 
 Because the first condition cannot fault and the second condition is
 always true, the compiler can transform this example as following,
 defeating control dependency:
 
 	q = READ_ONCE_CTRL(a);
-	ACCESS_ONCE(b) = 1;
+	WRITE_ONCE(b, 1);
 
 This example underscores the need to ensure that the compiler cannot
-out-guess your code.  More generally, although ACCESS_ONCE() does force
+out-guess your code.  More generally, although READ_ONCE() does force
 the compiler to actually emit code for a given load, it does not force
 the compiler to use the results.
 
@@ -769,7 +769,7 @@
 	=======================   =======================
 	r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
 	if (r1 > 0)               if (r2 > 0)
-	  ACCESS_ONCE(y) = 1;       ACCESS_ONCE(x) = 1;
+	  WRITE_ONCE(y, 1);         WRITE_ONCE(x, 1);
 
 	assert(!(r1 == 1 && r2 == 1));
 
@@ -779,7 +779,7 @@
 
 	CPU 2
 	=====================
-	ACCESS_ONCE(x) = 2;
+	WRITE_ONCE(x, 2);
 
 	assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
 
@@ -798,8 +798,7 @@
 
   (*) Control dependencies must be headed by READ_ONCE_CTRL().
       Or, as a much less preferable alternative, interpose
-      be headed by READ_ONCE() or an ACCESS_ONCE() read and must
-      have smp_read_barrier_depends() between this read and the
+      smp_read_barrier_depends() between a READ_ONCE() and the
       control-dependent write.
 
   (*) Control dependencies can order prior loads against later stores.
@@ -815,15 +814,16 @@
 
   (*) Control dependencies require at least one run-time conditional
       between the prior load and the subsequent store, and this
-      conditional must involve the prior load.  If the compiler
-      is able to optimize the conditional away, it will have also
-      optimized away the ordering.  Careful use of ACCESS_ONCE() can
-      help to preserve the needed conditional.
+      conditional must involve the prior load.  If the compiler is able
+      to optimize the conditional away, it will have also optimized
+      away the ordering.  Careful use of READ_ONCE_CTRL() READ_ONCE(),
+      and WRITE_ONCE() can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of ACCESS_ONCE() or
-      barrier() can help to preserve your control dependency.  Please
-      see the Compiler Barrier section for more information.
+      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
+      or smp_read_barrier_depends() can help to preserve your control
+      dependency.  Please see the Compiler Barrier section for more
+      information.
 
   (*) Control dependencies pair normally with other types of barriers.
 
@@ -848,11 +848,11 @@
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	ACCESS_ONCE(a) = 1;
+	WRITE_ONCE(a, 1);
 	<write barrier>
-	ACCESS_ONCE(b) = 2;   x = ACCESS_ONCE(b);
+	WRITE_ONCE(b, 2);     x = READ_ONCE(b);
 			      <read barrier>
-			      y = ACCESS_ONCE(a);
+			      y = READ_ONCE(a);
 
 Or:
 
@@ -860,7 +860,7 @@
 	===============	      ===============================
 	a = 1;
 	<write barrier>
-	ACCESS_ONCE(b) = &a;  x = ACCESS_ONCE(b);
+	WRITE_ONCE(b, &a);    x = READ_ONCE(b);
 			      <data dependency barrier>
 			      y = *x;
 
@@ -868,11 +868,11 @@
 
 	CPU 1		      CPU 2
 	===============	      ===============================
-	r1 = ACCESS_ONCE(y);
+	r1 = READ_ONCE(y);
 	<general barrier>
-	ACCESS_ONCE(y) = 1;   if (r2 = ACCESS_ONCE(x)) {
+	WRITE_ONCE(y, 1);     if (r2 = READ_ONCE(x)) {
 			         <implicit control dependency>
-			         ACCESS_ONCE(y) = 1;
+			         WRITE_ONCE(y, 1);
 			      }
 
 	assert(r1 == 0 || r2 == 0);
@@ -886,11 +886,11 @@
 
 	CPU 1                               CPU 2
 	===================                 ===================
-	ACCESS_ONCE(a) = 1;  }----   --->{  v = ACCESS_ONCE(c);
-	ACCESS_ONCE(b) = 2;  }    \ /    {  w = ACCESS_ONCE(d);
+	WRITE_ONCE(a, 1);    }----   --->{  v = READ_ONCE(c);
+	WRITE_ONCE(b, 2);    }    \ /    {  w = READ_ONCE(d);
 	<write barrier>            \        <read barrier>
-	ACCESS_ONCE(c) = 3;  }    / \    {  x = ACCESS_ONCE(a);
-	ACCESS_ONCE(d) = 4;  }----   --->{  y = ACCESS_ONCE(b);
+	WRITE_ONCE(c, 3);    }    / \    {  x = READ_ONCE(a);
+	WRITE_ONCE(d, 4);    }----   --->{  y = READ_ONCE(b);
 
 
 EXAMPLES OF MEMORY BARRIER SEQUENCES
@@ -1340,10 +1340,10 @@
 
 	barrier();
 
-This is a general barrier -- there are no read-read or write-write variants
-of barrier().  However, ACCESS_ONCE() can be thought of as a weak form
-for barrier() that affects only the specific accesses flagged by the
-ACCESS_ONCE().
+This is a general barrier -- there are no read-read or write-write
+variants of barrier().  However, READ_ONCE() and WRITE_ONCE() can be
+thought of as weak forms of barrier() that affect only the specific
+accesses flagged by the READ_ONCE() or WRITE_ONCE().
 
 The barrier() function has the following effects:
 
@@ -1355,9 +1355,10 @@
  (*) Within a loop, forces the compiler to load the variables used
      in that loop's conditional on each pass through that loop.
 
-The ACCESS_ONCE() function can prevent any number of optimizations that,
-while perfectly safe in single-threaded code, can be fatal in concurrent
-code.  Here are some examples of these sorts of optimizations:
+The READ_ONCE() and WRITE_ONCE() functions can prevent any number of
+optimizations that, while perfectly safe in single-threaded code, can
+be fatal in concurrent code.  Here are some examples of these sorts
+of optimizations:
 
  (*) The compiler is within its rights to reorder loads and stores
      to the same variable, and in some cases, the CPU is within its
@@ -1370,11 +1371,11 @@
      Might result in an older value of x stored in a[1] than in a[0].
      Prevent both the compiler and the CPU from doing this as follows:
 
-	a[0] = ACCESS_ONCE(x);
-	a[1] = ACCESS_ONCE(x);
+	a[0] = READ_ONCE(x);
+	a[1] = READ_ONCE(x);
 
-     In short, ACCESS_ONCE() provides cache coherence for accesses from
-     multiple CPUs to a single variable.
+     In short, READ_ONCE() and WRITE_ONCE() provide cache coherence for
+     accesses from multiple CPUs to a single variable.
 
  (*) The compiler is within its rights to merge successive loads from
      the same variable.  Such merging can cause the compiler to "optimize"
@@ -1391,9 +1392,9 @@
 		for (;;)
 			do_something_with(tmp);
 
-     Use ACCESS_ONCE() to prevent the compiler from doing this to you:
+     Use READ_ONCE() to prevent the compiler from doing this to you:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
  (*) The compiler is within its rights to reload a variable, for example,
@@ -1415,9 +1416,9 @@
      a was modified by some other CPU between the "while" statement and
      the call to do_something_with().
 
-     Again, use ACCESS_ONCE() to prevent the compiler from doing this:
+     Again, use READ_ONCE() to prevent the compiler from doing this:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
      Note that if the compiler runs short of registers, it might save
@@ -1437,21 +1438,21 @@
 
 	do { } while (0);
 
-     This transformation is a win for single-threaded code because it gets
-     rid of a load and a branch.  The problem is that the compiler will
-     carry out its proof assuming that the current CPU is the only one
-     updating variable 'a'.  If variable 'a' is shared, then the compiler's
-     proof will be erroneous.  Use ACCESS_ONCE() to tell the compiler
-     that it doesn't know as much as it thinks it does:
+     This transformation is a win for single-threaded code because it
+     gets rid of a load and a branch.  The problem is that the compiler
+     will carry out its proof assuming that the current CPU is the only
+     one updating variable 'a'.  If variable 'a' is shared, then the
+     compiler's proof will be erroneous.  Use READ_ONCE() to tell the
+     compiler that it doesn't know as much as it thinks it does:
 
-	while (tmp = ACCESS_ONCE(a))
+	while (tmp = READ_ONCE(a))
 		do_something_with(tmp);
 
      But please note that the compiler is also closely watching what you
-     do with the value after the ACCESS_ONCE().  For example, suppose you
+     do with the value after the READ_ONCE().  For example, suppose you
      do the following and MAX is a preprocessor macro with the value 1:
 
-	while ((tmp = ACCESS_ONCE(a)) % MAX)
+	while ((tmp = READ_ONCE(a)) % MAX)
 		do_something_with(tmp);
 
      Then the compiler knows that the result of the "%" operator applied
@@ -1475,12 +1476,12 @@
      surprise if some other CPU might have stored to variable 'a' in the
      meantime.
 
-     Use ACCESS_ONCE() to prevent the compiler from making this sort of
+     Use WRITE_ONCE() to prevent the compiler from making this sort of
      wrong guess:
 
-	ACCESS_ONCE(a) = 0;
+	WRITE_ONCE(a, 0);
 	/* Code that does not store to variable a. */
-	ACCESS_ONCE(a) = 0;
+	WRITE_ONCE(a, 0);
 
  (*) The compiler is within its rights to reorder memory accesses unless
      you tell it not to.  For example, consider the following interaction
@@ -1509,40 +1510,43 @@
 	}
 
      If the interrupt occurs between these two statement, then
-     interrupt_handler() might be passed a garbled msg.  Use ACCESS_ONCE()
+     interrupt_handler() might be passed a garbled msg.  Use WRITE_ONCE()
      to prevent this as follows:
 
 	void process_level(void)
 	{
-		ACCESS_ONCE(msg) = get_message();
-		ACCESS_ONCE(flag) = true;
+		WRITE_ONCE(msg, get_message());
+		WRITE_ONCE(flag, true);
 	}
 
 	void interrupt_handler(void)
 	{
-		if (ACCESS_ONCE(flag))
-			process_message(ACCESS_ONCE(msg));
+		if (READ_ONCE(flag))
+			process_message(READ_ONCE(msg));
 	}
 
-     Note that the ACCESS_ONCE() wrappers in interrupt_handler()
-     are needed if this interrupt handler can itself be interrupted
-     by something that also accesses 'flag' and 'msg', for example,
-     a nested interrupt or an NMI.  Otherwise, ACCESS_ONCE() is not
-     needed in interrupt_handler() other than for documentation purposes.
-     (Note also that nested interrupts do not typically occur in modern
-     Linux kernels, in fact, if an interrupt handler returns with
-     interrupts enabled, you will get a WARN_ONCE() splat.)
+     Note that the READ_ONCE() and WRITE_ONCE() wrappers in
+     interrupt_handler() are needed if this interrupt handler can itself
+     be interrupted by something that also accesses 'flag' and 'msg',
+     for example, a nested interrupt or an NMI.  Otherwise, READ_ONCE()
+     and WRITE_ONCE() are not needed in interrupt_handler() other than
+     for documentation purposes.  (Note also that nested interrupts
+     do not typically occur in modern Linux kernels, in fact, if an
+     interrupt handler returns with interrupts enabled, you will get a
+     WARN_ONCE() splat.)
 
-     You should assume that the compiler can move ACCESS_ONCE() past
-     code not containing ACCESS_ONCE(), barrier(), or similar primitives.
+     You should assume that the compiler can move READ_ONCE() and
+     WRITE_ONCE() past code not containing READ_ONCE(), WRITE_ONCE(),
+     barrier(), or similar primitives.
 
-     This effect could also be achieved using barrier(), but ACCESS_ONCE()
-     is more selective:  With ACCESS_ONCE(), the compiler need only forget
-     the contents of the indicated memory locations, while with barrier()
-     the compiler must discard the value of all memory locations that
-     it has currented cached in any machine registers.  Of course,
-     the compiler must also respect the order in which the ACCESS_ONCE()s
-     occur, though the CPU of course need not do so.
+     This effect could also be achieved using barrier(), but READ_ONCE()
+     and WRITE_ONCE() are more selective:  With READ_ONCE() and
+     WRITE_ONCE(), the compiler need only forget the contents of the
+     indicated memory locations, while with barrier() the compiler must
+     discard the value of all memory locations that it has currented
+     cached in any machine registers.  Of course, the compiler must also
+     respect the order in which the READ_ONCE()s and WRITE_ONCE()s occur,
+     though the CPU of course need not do so.
 
  (*) The compiler is within its rights to invent stores to a variable,
      as in the following example:
@@ -1562,16 +1566,16 @@
      a branch.  Unfortunately, in concurrent code, this optimization
      could cause some other CPU to see a spurious value of 42 -- even
      if variable 'a' was never zero -- when loading variable 'b'.
-     Use ACCESS_ONCE() to prevent this as follows:
+     Use WRITE_ONCE() to prevent this as follows:
 
 	if (a)
-		ACCESS_ONCE(b) = a;
+		WRITE_ONCE(b, a);
 	else
-		ACCESS_ONCE(b) = 42;
+		WRITE_ONCE(b, 42);
 
      The compiler can also invent loads.  These are usually less
      damaging, but they can result in cache-line bouncing and thus in
-     poor performance and scalability.  Use ACCESS_ONCE() to prevent
+     poor performance and scalability.  Use READ_ONCE() to prevent
      invented loads.
 
  (*) For aligned memory locations whose size allows them to be accessed
@@ -1590,9 +1594,9 @@
      This optimization can therefore be a win in single-threaded code.
      In fact, a recent bug (since fixed) caused GCC to incorrectly use
      this optimization in a volatile store.  In the absence of such bugs,
-     use of ACCESS_ONCE() prevents store tearing in the following example:
+     use of WRITE_ONCE() prevents store tearing in the following example:
 
-	ACCESS_ONCE(p) = 0x00010002;
+	WRITE_ONCE(p, 0x00010002);
 
      Use of packed structures can also result in load and store tearing,
      as in this example:
@@ -1609,22 +1613,23 @@
 	foo2.b = foo1.b;
 	foo2.c = foo1.c;
 
-     Because there are no ACCESS_ONCE() wrappers and no volatile markings,
-     the compiler would be well within its rights to implement these three
-     assignment statements as a pair of 32-bit loads followed by a pair
-     of 32-bit stores.  This would result in load tearing on 'foo1.b'
-     and store tearing on 'foo2.b'.  ACCESS_ONCE() again prevents tearing
-     in this example:
+     Because there are no READ_ONCE() or WRITE_ONCE() wrappers and no
+     volatile markings, the compiler would be well within its rights to
+     implement these three assignment statements as a pair of 32-bit
+     loads followed by a pair of 32-bit stores.  This would result in
+     load tearing on 'foo1.b' and store tearing on 'foo2.b'.  READ_ONCE()
+     and WRITE_ONCE() again prevent tearing in this example:
 
 	foo2.a = foo1.a;
-	ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b);
+	WRITE_ONCE(foo2.b, READ_ONCE(foo1.b));
 	foo2.c = foo1.c;
 
-All that aside, it is never necessary to use ACCESS_ONCE() on a variable
-that has been marked volatile.  For example, because 'jiffies' is marked
-volatile, it is never necessary to say ACCESS_ONCE(jiffies).  The reason
-for this is that ACCESS_ONCE() is implemented as a volatile cast, which
-has no effect when its argument is already marked volatile.
+All that aside, it is never necessary to use READ_ONCE() and
+WRITE_ONCE() on a variable that has been marked volatile.  For example,
+because 'jiffies' is marked volatile, it is never necessary to
+say READ_ONCE(jiffies).  The reason for this is that READ_ONCE() and
+WRITE_ONCE() are implemented as volatile casts, which has no effect when
+its argument is already marked volatile.
 
 Please note that these compiler barriers have no direct effect on the CPU,
 which may then reorder things however it wishes.
@@ -1646,14 +1651,15 @@
 All memory barriers except the data dependency barriers imply a compiler
 barrier. Data dependencies do not impose any additional compiler ordering.
 
-Aside: In the case of data dependencies, the compiler would be expected to
-issue the loads in the correct order (eg. `a[b]` would have to load the value
-of b before loading a[b]), however there is no guarantee in the C specification
-that the compiler may not speculate the value of b (eg. is equal to 1) and load
-a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the
-problem of a compiler reloading b after having loaded a[b], thus having a newer
-copy of b than a[b]. A consensus has not yet been reached about these problems,
-however the ACCESS_ONCE macro is a good place to start looking.
+Aside: In the case of data dependencies, the compiler would be expected
+to issue the loads in the correct order (eg. `a[b]` would have to load
+the value of b before loading a[b]), however there is no guarantee in
+the C specification that the compiler may not speculate the value of b
+(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
+tmp = a[b]; ). There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+has not yet been reached about these problems, however the READ_ONCE()
+macro is a good place to start looking.
 
 SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
 systems because it is assumed that a CPU will appear to be self-consistent,
@@ -1852,11 +1858,12 @@
 imply a full memory barrier.  If it is necessary for a RELEASE-ACQUIRE
 pair to produce a full barrier, the ACQUIRE can be followed by an
 smp_mb__after_unlock_lock() invocation.  This will produce a full barrier
-if either (a) the RELEASE and the ACQUIRE are executed by the same
-CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable.
-The smp_mb__after_unlock_lock() primitive is free on many architectures.
-Without smp_mb__after_unlock_lock(), the CPU's execution of the critical
-sections corresponding to the RELEASE and the ACQUIRE can cross, so that:
+(including transitivity) if either (a) the RELEASE and the ACQUIRE are
+executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on
+the same variable.  The smp_mb__after_unlock_lock() primitive is free
+on many architectures.  Without smp_mb__after_unlock_lock(), the CPU's
+execution of the critical sections corresponding to the RELEASE and the
+ACQUIRE can cross, so that:
 
 	*A = a;
 	RELEASE M
@@ -2126,12 +2133,12 @@
 
 	CPU 1				CPU 2
 	===============================	===============================
-	ACCESS_ONCE(*A) = a;		ACCESS_ONCE(*E) = e;
+	WRITE_ONCE(*A, a);		WRITE_ONCE(*E, e);
 	ACQUIRE M			ACQUIRE Q
-	ACCESS_ONCE(*B) = b;		ACCESS_ONCE(*F) = f;
-	ACCESS_ONCE(*C) = c;		ACCESS_ONCE(*G) = g;
+	WRITE_ONCE(*B, b);		WRITE_ONCE(*F, f);
+	WRITE_ONCE(*C, c);		WRITE_ONCE(*G, g);
 	RELEASE M			RELEASE Q
-	ACCESS_ONCE(*D) = d;		ACCESS_ONCE(*H) = h;
+	WRITE_ONCE(*D, d);		WRITE_ONCE(*H, h);
 
 Then there is no guarantee as to what order CPU 3 will see the accesses to *A
 through *H occur in, other than the constraints imposed by the separate locks
@@ -2151,18 +2158,18 @@
 
 	CPU 1				CPU 2
 	===============================	===============================
-	ACCESS_ONCE(*A) = a;
+	WRITE_ONCE(*A, a);
 	ACQUIRE M		     [1]
-	ACCESS_ONCE(*B) = b;
-	ACCESS_ONCE(*C) = c;
+	WRITE_ONCE(*B, b);
+	WRITE_ONCE(*C, c);
 	RELEASE M	     [1]
-	ACCESS_ONCE(*D) = d;		ACCESS_ONCE(*E) = e;
+	WRITE_ONCE(*D, d);		WRITE_ONCE(*E, e);
 					ACQUIRE M		     [2]
 					smp_mb__after_unlock_lock();
-					ACCESS_ONCE(*F) = f;
-					ACCESS_ONCE(*G) = g;
+					WRITE_ONCE(*F, f);
+					WRITE_ONCE(*G, g);
 					RELEASE M	     [2]
-					ACCESS_ONCE(*H) = h;
+					WRITE_ONCE(*H, h);
 
 CPU 3 might see:
 
@@ -2881,11 +2888,11 @@
 operations in exactly the order specified, so that if the CPU is, for example,
 given the following piece of code to execute:
 
-	a = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*B) = b;
-	c = ACCESS_ONCE(*C);
-	d = ACCESS_ONCE(*D);
-	ACCESS_ONCE(*E) = e;
+	a = READ_ONCE(*A);
+	WRITE_ONCE(*B, b);
+	c = READ_ONCE(*C);
+	d = READ_ONCE(*D);
+	WRITE_ONCE(*E, e);
 
 they would then expect that the CPU will complete the memory operation for each
 instruction before moving on to the next one, leading to a definite sequence of
@@ -2932,12 +2939,12 @@
 _own_ accesses appear to be correctly ordered, without the need for a memory
 barrier.  For instance with the following code:
 
-	U = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*A) = V;
-	ACCESS_ONCE(*A) = W;
-	X = ACCESS_ONCE(*A);
-	ACCESS_ONCE(*A) = Y;
-	Z = ACCESS_ONCE(*A);
+	U = READ_ONCE(*A);
+	WRITE_ONCE(*A, V);
+	WRITE_ONCE(*A, W);
+	X = READ_ONCE(*A);
+	WRITE_ONCE(*A, Y);
+	Z = READ_ONCE(*A);
 
 and assuming no intervention by an external influence, it can be assumed that
 the final result will appear to be:
@@ -2953,13 +2960,14 @@
 	U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
 
 in that order, but, without intervention, the sequence may have almost any
-combination of elements combined or discarded, provided the program's view of
-the world remains consistent.  Note that ACCESS_ONCE() is -not- optional
-in the above example, as there are architectures where a given CPU might
-reorder successive loads to the same location.  On such architectures,
-ACCESS_ONCE() does whatever is necessary to prevent this, for example, on
-Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the
-special ld.acq and st.rel instructions that prevent such reordering.
+combination of elements combined or discarded, provided the program's view
+of the world remains consistent.  Note that READ_ONCE() and WRITE_ONCE()
+are -not- optional in the above example, as there are architectures
+where a given CPU might reorder successive loads to the same location.
+On such architectures, READ_ONCE() and WRITE_ONCE() do whatever is
+necessary to prevent this, for example, on Itanium the volatile casts
+used by READ_ONCE() and WRITE_ONCE() cause GCC to emit the special ld.acq
+and st.rel instructions (respectively) that prevent such reordering.
 
 The compiler may also combine, discard or defer elements of the sequence before
 the CPU even sees them.
@@ -2973,13 +2981,14 @@
 
 	*A = W;
 
-since, without either a write barrier or an ACCESS_ONCE(), it can be
+since, without either a write barrier or an WRITE_ONCE(), it can be
 assumed that the effect of the storage of V to *A is lost.  Similarly:
 
 	*A = Y;
 	Z = *A;
 
-may, without a memory barrier or an ACCESS_ONCE(), be reduced to:
+may, without a memory barrier or an READ_ONCE() and WRITE_ONCE(), be
+reduced to:
 
 	*A = Y;
 	Z = Y;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0f2cb55..7719295 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -686,10 +686,20 @@
 
 #define RCUTORTURE_TASKS_OPS &tasks_ops,
 
+static bool __maybe_unused torturing_tasks(void)
+{
+	return cur_ops == &tasks_ops;
+}
+
 #else /* #ifdef CONFIG_TASKS_RCU */
 
 #define RCUTORTURE_TASKS_OPS
 
+static bool torturing_tasks(void)
+{
+	return false;
+}
+
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /*
@@ -825,9 +835,7 @@
 	}
 	if (err) {
 		VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM");
-		while (!torture_must_stop())
-			schedule_timeout_interruptible(HZ);
-		return 0;
+		goto wait_for_stop;
 	}
 	VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started");
 	do {
@@ -846,6 +854,7 @@
 		stutter_wait("rcu_torture_cbflood");
 	} while (!torture_must_stop());
 	vfree(rhp);
+wait_for_stop:
 	torture_kthread_stopping("rcu_torture_cbflood");
 	return 0;
 }
@@ -1090,7 +1099,8 @@
 	p = rcu_dereference_check(rcu_torture_current,
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
-				  srcu_read_lock_held(srcu_ctlp));
+				  srcu_read_lock_held(srcu_ctlp) ||
+				  torturing_tasks());
 	if (p == NULL) {
 		/* Leave because rcu_torture_writer is not yet underway */
 		cur_ops->readunlock(idx);
@@ -1164,7 +1174,8 @@
 		p = rcu_dereference_check(rcu_torture_current,
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
-					  srcu_read_lock_held(srcu_ctlp));
+					  srcu_read_lock_held(srcu_ctlp) ||
+					  torturing_tasks());
 		if (p == NULL) {
 			/* Wait for rcu_torture_writer to get underway */
 			cur_ops->readunlock(idx);
@@ -1509,7 +1520,7 @@
 	int i;
 	int ret;
 
-	if (n_barrier_cbs == 0)
+	if (n_barrier_cbs <= 0)
 		return 0;
 	if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
 		pr_alert("%s" TORTURE_FLAG
@@ -1788,12 +1799,15 @@
 					  writer_task);
 	if (firsterr)
 		goto unwind;
-	fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
-				   GFP_KERNEL);
-	if (fakewriter_tasks == NULL) {
-		VERBOSE_TOROUT_ERRSTRING("out of memory");
-		firsterr = -ENOMEM;
-		goto unwind;
+	if (nfakewriters > 0) {
+		fakewriter_tasks = kzalloc(nfakewriters *
+					   sizeof(fakewriter_tasks[0]),
+					   GFP_KERNEL);
+		if (fakewriter_tasks == NULL) {
+			VERBOSE_TOROUT_ERRSTRING("out of memory");
+			firsterr = -ENOMEM;
+			goto unwind;
+		}
 	}
 	for (i = 0; i < nfakewriters; i++) {
 		firsterr = torture_create_kthread(rcu_torture_fakewriter,
@@ -1820,7 +1834,7 @@
 		if (firsterr)
 			goto unwind;
 	}
-	if (test_no_idle_hz) {
+	if (test_no_idle_hz && shuffle_interval > 0) {
 		firsterr = torture_shuffle_init(shuffle_interval * HZ);
 		if (firsterr)
 			goto unwind;
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index 2cc0e60..bafe94c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -5,6 +5,6 @@
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
 CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_RCU_EXPERT=y