[MIPS] Improve branch prediction in ll/sc atomic operations.

Now that finally all supported versions of binutils have functioning
support for .subsection use .subsection to tweak the branch prediction

I did not modify the R10000 errata variants because it seems unclear if
this will invalidate the workaround which actually relies on the cheesy
prediction of branch likely to cause a misspredict if the sc was
successful.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 06445de..06c0822 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -68,7 +68,10 @@
 		"1:	" __LL "%0, %1			# set_bit	\n"
 		"	or	%0, %2					\n"
 		"	" __SC	"%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -116,7 +119,10 @@
 		"1:	" __LL "%0, %1			# clear_bit	\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 1b					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
 		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (~(1UL << (nr & SZLONG_MASK))), "m" (*m));
@@ -166,7 +172,10 @@
 		"1:	" __LL "%0, %1		# change_bit	\n"
 		"	xor	%0, %2				\n"
 		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 1b				\n"
+		"	beqz	%0, 2f				\n"
+		"	.subsection 2				\n"
+		"2:	b	1b				\n"
+		"	.previous				\n"
 		"	.set	mips0				\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << (nr & SZLONG_MASK)), "m" (*m));
@@ -222,8 +231,12 @@
 		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -290,8 +303,12 @@
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
 		"	" __SC 	"%2, %1					\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)
@@ -356,8 +373,12 @@
 		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
 		"	xor	%2, %0, %3				\n"
 		"	" __SC	"\t%2, %1				\n"
-		"	beqz	%2, 1b					\n"
+		"	beqz	%2, 2f					\n"
 		"	 and	%2, %0, %3				\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	 nop						\n"
+		"	.previous					\n"
 		"	.set	pop					\n"
 		: "=&r" (temp), "=m" (*m), "=&r" (res)
 		: "r" (1UL << (nr & SZLONG_MASK)), "m" (*m)