Merge branch 'lockref' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 lockref enablement from Heiko Carstens:
 "Enabling the new lockless lockref variant on s390 would have been
  trivial until Tony Luck added a cpu_relax() call into the
  CMPXCHG_LOOP(), with commit d472d9d98b46 ("lockref: Relax in cmpxchg
  loop")

  As already mentioned cpu_relax() is very expensive on s390 since it
  yields() the current virtual cpu.  So we are talking of several
  thousand cycles.  Considering this enabling the lockless lockref
  variant would contradict the intention of the new semantics.  And also
  some quick measurements show performance regressions of 50% and more.

  Simply removing the cpu_relax() call again seems also not very
  desireable since Waiman Long reported that for some workloads the call
  improved performance by 5%."

* 'lockref' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390: enable ARCH_USE_CMPXCHG_LOCKREF
  lockref: use arch_mutex_cpu_relax() in CMPXCHG_LOOP()
  mutex: replace CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX with simple ifdef
diff --git a/arch/Kconfig b/arch/Kconfig
index 1feb169..af2cc6e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -286,9 +286,6 @@
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
-config HAVE_ARCH_MUTEX_CPU_RELAX
-	bool
-
 config HAVE_RCU_TABLE_FREE
 	bool
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index dcc6ac2..7143793 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -93,6 +93,7 @@
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
@@ -102,7 +103,6 @@
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
-	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 688271f..458c1f7 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,5 +7,3 @@
  */
 
 #include <asm-generic/mutex-dec.h>
-
-#define arch_mutex_cpu_relax()	barrier()
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0eb3750..ca7821f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -198,6 +198,8 @@
 	barrier();
 }
 
+#define arch_mutex_cpu_relax()  barrier()
+
 static inline void psw_set_key(unsigned int key)
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 701fe8c..83e5d216 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -44,6 +44,11 @@
 extern int arch_spin_trylock_retry(arch_spinlock_t *);
 extern void arch_spin_relax(arch_spinlock_t *lock);
 
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner_cpu == 0;
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
 	int old;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index ccd4260..bab49da 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -15,8 +15,8 @@
 #include <linux/spinlock_types.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
-
 #include <linux/atomic.h>
+#include <asm/processor.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -175,8 +175,8 @@
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
-#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
-#define arch_mutex_cpu_relax()	cpu_relax()
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
 #endif
 
 #endif
diff --git a/lib/lockref.c b/lib/lockref.c
index e294ae4..6f9d434 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -12,6 +12,14 @@
 #endif
 
 /*
+ * Allow architectures to override the default cpu_relax() within CMPXCHG_LOOP.
+ * This is useful for architectures with an expensive cpu_relax().
+ */
+#ifndef arch_mutex_cpu_relax
+# define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
+/*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
  */
@@ -28,7 +36,7 @@
 		if (likely(old.lock_count == prev.lock_count)) {		\
 			SUCCESS;						\
 		}								\
-		cpu_relax();							\
+		arch_mutex_cpu_relax();						\
 	}									\
 } while (0)