arm64: atomics: fix grossly inconsistent asm constraints for exclusives

Our uses of inline asm constraints for atomic operations are fairly
wild and varied. We basically need to guarantee the following:

  1. Any instructions with barrier implications
     (load-acquire/store-release) have a "memory" clobber

  2. When performing exclusive accesses, the addresing mode is generated
     using the "Q" constraint

  3. Atomic blocks which use the condition flags, have a "cc" clobber

This patch addresses these concerns which, as well as fixing the
semantics of the code, stops GCC complaining about impossible asm
constraints.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index e0e65b0..968b5cb 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -29,39 +29,39 @@
 	switch (size) {
 	case 1:
 		asm volatile("//	__xchg1\n"
-		"1:	ldaxrb	%w0, [%3]\n"
-		"	stlxrb	%w1, %w2, [%3]\n"
+		"1:	ldaxrb	%w0, %2\n"
+		"	stlxrb	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
-			: "=&r" (ret), "=&r" (tmp)
-			: "r" (x), "r" (ptr)
-			: "memory", "cc");
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
+			: "r" (x)
+			: "cc", "memory");
 		break;
 	case 2:
 		asm volatile("//	__xchg2\n"
-		"1:	ldaxrh	%w0, [%3]\n"
-		"	stlxrh	%w1, %w2, [%3]\n"
+		"1:	ldaxrh	%w0, %2\n"
+		"	stlxrh	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
-			: "=&r" (ret), "=&r" (tmp)
-			: "r" (x), "r" (ptr)
-			: "memory", "cc");
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
+			: "r" (x)
+			: "cc", "memory");
 		break;
 	case 4:
 		asm volatile("//	__xchg4\n"
-		"1:	ldaxr	%w0, [%3]\n"
-		"	stlxr	%w1, %w2, [%3]\n"
+		"1:	ldaxr	%w0, %2\n"
+		"	stlxr	%w1, %w3, %2\n"
 		"	cbnz	%w1, 1b\n"
-			: "=&r" (ret), "=&r" (tmp)
-			: "r" (x), "r" (ptr)
-			: "memory", "cc");
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
+			: "r" (x)
+			: "cc", "memory");
 		break;
 	case 8:
 		asm volatile("//	__xchg8\n"
-		"1:	ldaxr	%0, [%3]\n"
-		"	stlxr	%w1, %2, [%3]\n"
+		"1:	ldaxr	%0, %2\n"
+		"	stlxr	%w1, %3, %2\n"
 		"	cbnz	%w1, 1b\n"
-			: "=&r" (ret), "=&r" (tmp)
-			: "r" (x), "r" (ptr)
-			: "memory", "cc");
+			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
+			: "r" (x)
+			: "cc", "memory");
 		break;
 	default:
 		BUILD_BUG();
@@ -82,14 +82,14 @@
 	case 1:
 		do {
 			asm volatile("// __cmpxchg1\n"
-			"	ldxrb	%w1, [%2]\n"
+			"	ldxrb	%w1, %2\n"
 			"	mov	%w0, #0\n"
 			"	cmp	%w1, %w3\n"
 			"	b.ne	1f\n"
-			"	stxrb	%w0, %w4, [%2]\n"
+			"	stxrb	%w0, %w4, %2\n"
 			"1:\n"
-				: "=&r" (res), "=&r" (oldval)
-				: "r" (ptr), "Ir" (old), "r" (new)
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
+				: "Ir" (old), "r" (new)
 				: "cc");
 		} while (res);
 		break;
@@ -97,29 +97,29 @@
 	case 2:
 		do {
 			asm volatile("// __cmpxchg2\n"
-			"	ldxrh	%w1, [%2]\n"
+			"	ldxrh	%w1, %2\n"
 			"	mov	%w0, #0\n"
 			"	cmp	%w1, %w3\n"
 			"	b.ne	1f\n"
-			"	stxrh	%w0, %w4, [%2]\n"
+			"	stxrh	%w0, %w4, %2\n"
 			"1:\n"
-				: "=&r" (res), "=&r" (oldval)
-				: "r" (ptr), "Ir" (old), "r" (new)
-				: "memory", "cc");
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
+				: "Ir" (old), "r" (new)
+				: "cc");
 		} while (res);
 		break;
 
 	case 4:
 		do {
 			asm volatile("// __cmpxchg4\n"
-			"	ldxr	%w1, [%2]\n"
+			"	ldxr	%w1, %2\n"
 			"	mov	%w0, #0\n"
 			"	cmp	%w1, %w3\n"
 			"	b.ne	1f\n"
-			"	stxr	%w0, %w4, [%2]\n"
+			"	stxr	%w0, %w4, %2\n"
 			"1:\n"
-				: "=&r" (res), "=&r" (oldval)
-				: "r" (ptr), "Ir" (old), "r" (new)
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
+				: "Ir" (old), "r" (new)
 				: "cc");
 		} while (res);
 		break;
@@ -127,14 +127,14 @@
 	case 8:
 		do {
 			asm volatile("// __cmpxchg8\n"
-			"	ldxr	%1, [%2]\n"
+			"	ldxr	%1, %2\n"
 			"	mov	%w0, #0\n"
 			"	cmp	%1, %3\n"
 			"	b.ne	1f\n"
-			"	stxr	%w0, %4, [%2]\n"
+			"	stxr	%w0, %4, %2\n"
 			"1:\n"
-				: "=&r" (res), "=&r" (oldval)
-				: "r" (ptr), "Ir" (old), "r" (new)
+				: "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
+				: "Ir" (old), "r" (new)
 				: "cc");
 		} while (res);
 		break;