MIPS: Get rid of branches to .subsections.

It was a nice optimization - on paper at least.  In practice it results in
branches that may exceed the maximum legal range for a branch.  We can
fight that problem with -ffunction-sections but -ffunction-sections again
is incompatible with -pg used by the function tracer.

By rewriting the loop around all simple LL/SC blocks to C we reduce the
amount of inline assembler and at the same time allow GCC to often fill
the branch delay slots with something sensible or whatever else clever
optimization it may have up in its sleeve.

With this optimization gone we also no longer need -ffunction-sections,
so drop it.

This optimization was originally introduced in 2.6.21, commit
5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
f65e4fa8e0c6022ad58dc88d1b11b12589ed7f9f (kernel.org).

Original fix for the issues which caused me to pull this optimization by
Paul Gortmaker <paul.gortmaker@windriver.com>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index b0ce7ca..50b4ef2 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -73,30 +73,26 @@
 		: "ir" (1UL << bit), "m" (*m));
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		__asm__ __volatile__(
-		"1:	" __LL "%0, %1			# set_bit	\n"
-		"	" __INS "%0, %4, %2, 1				\n"
-		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (bit), "m" (*m), "r" (~0));
+		do {
+			__asm__ __volatile__(
+			"	" __LL "%0, %1		# set_bit	\n"
+			"	" __INS "%0, %3, %2, 1			\n"
+			"	" __SC "%0, %1				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (bit), "r" (~0));
+		} while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 */
 	} else if (kernel_uses_llsc) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# set_bit	\n"
-		"	or	%0, %2					\n"
-		"	" __SC	"%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (1UL << bit), "m" (*m));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL "%0, %1		# set_bit	\n"
+			"	or	%0, %2				\n"
+			"	" __SC	"%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (1UL << bit));
+		} while (unlikely(!temp));
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -134,34 +130,30 @@
 		"	" __SC "%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (~(1UL << bit)), "m" (*m));
+		: "=&r" (temp), "+m" (*m)
+		: "ir" (~(1UL << bit)));
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
-		__asm__ __volatile__(
-		"1:	" __LL "%0, %1			# clear_bit	\n"
-		"	" __INS "%0, $0, %2, 1				\n"
-		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (bit), "m" (*m));
+		do {
+			__asm__ __volatile__(
+			"	" __LL "%0, %1		# clear_bit	\n"
+			"	" __INS "%0, $0, %2, 1			\n"
+			"	" __SC "%0, %1				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (bit));
+		} while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 */
 	} else if (kernel_uses_llsc) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# clear_bit	\n"
-		"	and	%0, %2					\n"
-		"	" __SC "%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (~(1UL << bit)), "m" (*m));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL "%0, %1		# clear_bit	\n"
+			"	and	%0, %2				\n"
+			"	" __SC "%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (~(1UL << bit)));
+		} while (unlikely(!temp));
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -213,24 +205,22 @@
 		"	" __SC	"%0, %1				\n"
 		"	beqzl	%0, 1b				\n"
 		"	.set	mips0				\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (1UL << bit), "m" (*m));
+		: "=&r" (temp), "+m" (*m)
+		: "ir" (1UL << bit));
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"	.set	mips3				\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 2f				\n"
-		"	.subsection 2				\n"
-		"2:	b	1b				\n"
-		"	.previous				\n"
-		"	.set	mips0				\n"
-		: "=&r" (temp), "=m" (*m)
-		: "ir" (1UL << bit), "m" (*m));
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL "%0, %1		# change_bit	\n"
+			"	xor	%0, %2				\n"
+			"	" __SC	"%0, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m)
+			: "ir" (1UL << bit));
+		} while (unlikely(!temp));
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -272,30 +262,26 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL "%0, %1	# test_and_set_bit	\n"
+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -340,30 +326,26 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	" __SC	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL "%0, %1	# test_and_set_bit	\n"
+			"	or	%2, %0, %3			\n"
+			"	" __SC	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -410,49 +392,43 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (kernel_uses_llsc && __builtin_constant_p(nr)) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
-		"	" __EXT "%2, %0, %3, 1				\n"
-		"	" __INS	"%0, $0, %3, 1				\n"
-		"	" __SC 	"%0, %1					\n"
-		"	beqz	%0, 2f					\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	.previous					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "ir" (bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	" __LL	"%0, %1	# test_and_clear_bit	\n"
+			"	" __EXT "%2, %0, %3, 1			\n"
+			"	" __INS	"%0, $0, %3, 1			\n"
+			"	" __SC 	"%0, %1				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "ir" (bit)
+			: "memory");
+		} while (unlikely(!temp));
 #endif
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
-		"	or	%2, %0, %3				\n"
-		"	xor	%2, %3					\n"
-		"	" __SC 	"%2, %1					\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL	"%0, %1	# test_and_clear_bit	\n"
+			"	or	%2, %0, %3			\n"
+			"	xor	%2, %3				\n"
+			"	" __SC 	"%2, %1				\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;
@@ -499,30 +475,26 @@
 		"	beqzl	%2, 1b					\n"
 		"	and	%2, %0, %3				\n"
 		"	.set	mips0					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
+		: "=&r" (temp), "+m" (*m), "=&r" (res)
+		: "r" (1UL << bit)
 		: "memory");
 	} else if (kernel_uses_llsc) {
 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
 		unsigned long temp;
 
-		__asm__ __volatile__(
-		"	.set	push					\n"
-		"	.set	noreorder				\n"
-		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
-		"	xor	%2, %0, %3				\n"
-		"	" __SC	"\t%2, %1				\n"
-		"	beqz	%2, 2f					\n"
-		"	 and	%2, %0, %3				\n"
-		"	.subsection 2					\n"
-		"2:	b	1b					\n"
-		"	 nop						\n"
-		"	.previous					\n"
-		"	.set	pop					\n"
-		: "=&r" (temp), "=m" (*m), "=&r" (res)
-		: "r" (1UL << bit), "m" (*m)
-		: "memory");
+		do {
+			__asm__ __volatile__(
+			"	.set	mips3				\n"
+			"	" __LL	"%0, %1	# test_and_change_bit	\n"
+			"	xor	%2, %0, %3			\n"
+			"	" __SC	"\t%2, %1			\n"
+			"	.set	mips0				\n"
+			: "=&r" (temp), "+m" (*m), "=&r" (res)
+			: "r" (1UL << bit)
+			: "memory");
+		} while (unlikely(!res));
+
+		res = temp & (1UL << bit);
 	} else {
 		volatile unsigned long *a = addr;
 		unsigned long mask;