rcu: Move __rcu_read_unlock()'s barrier() within if-statement

We only need to constrain the compiler if we are actually exiting
the top-level RCU read-side critical section.  This commit therefore
moves the first barrier() cal in __rcu_read_unlock() to inside the
"if" statement, thus avoiding needless register flushes for inner
rcu_read_unlock() calls.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index af186e2..2cf4226 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -134,16 +134,6 @@
 
 extern void synchronize_sched(void);
 
-static inline void __rcu_read_lock_bh(void)
-{
-	local_bh_disable();
-}
-
-static inline void __rcu_read_unlock_bh(void)
-{
-	local_bh_enable();
-}
-
 #ifdef CONFIG_PREEMPT_RCU
 
 extern void __rcu_read_lock(void);
@@ -686,7 +676,7 @@
  */
 static inline void rcu_read_lock_bh(void)
 {
-	__rcu_read_lock_bh();
+	local_bh_disable();
 	__acquire(RCU_BH);
 	rcu_read_acquire_bh();
 }
@@ -700,7 +690,7 @@
 {
 	rcu_read_release_bh();
 	__release(RCU_BH);
-	__rcu_read_unlock_bh();
+	local_bh_enable();
 }
 
 /**