[SPARC64]: Need to clobber global reg vars in switch_to().

Otherwise the compiler can't see the things like the
per-cpu area base register are changing.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index caf8750..a1f53a4 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -3,6 +3,8 @@
 
 #include <linux/compiler.h>
 
+register unsigned long __local_per_cpu_offset asm("g5");
+
 #ifdef CONFIG_SMP
 
 #define setup_per_cpu_areas()			do { } while (0)
@@ -23,8 +25,6 @@
     __typeof__(type) per_cpu__##name				\
     ____cacheline_aligned_in_smp
 
-register unsigned long __local_per_cpu_offset asm("g5");
-
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
 #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset))
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index 64891cb..3f175fa 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -141,7 +141,6 @@
 	 * not preserve it's value.  Hairy, but it lets us remove 2 loads
 	 * and 2 stores in this critical code path.  -DaveM
 	 */
-#define EXTRA_CLOBBER ,"%l1"
 #define switch_to(prev, next, last)					\
 do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 		unsigned long __tmp;					\
@@ -164,33 +163,34 @@
 	"stx	%%i6, [%%sp + 2047 + 0x70]\n\t"				\
 	"stx	%%i7, [%%sp + 2047 + 0x78]\n\t"				\
 	"rdpr	%%wstate, %%o5\n\t"					\
-	"stx	%%o6, [%%g6 + %3]\n\t"					\
-	"stb	%%o5, [%%g6 + %2]\n\t"					\
-	"rdpr	%%cwp, %%o5\n\t"					\
+	"stx	%%o6, [%%g6 + %6]\n\t"					\
 	"stb	%%o5, [%%g6 + %5]\n\t"					\
-	"mov	%1, %%g6\n\t"						\
-	"ldub	[%1 + %5], %%g1\n\t"					\
+	"rdpr	%%cwp, %%o5\n\t"					\
+	"stb	%%o5, [%%g6 + %8]\n\t"					\
+	"mov	%4, %%g6\n\t"						\
+	"ldub	[%4 + %8], %%g1\n\t"					\
 	"wrpr	%%g1, %%cwp\n\t"					\
-	"ldx	[%%g6 + %3], %%o6\n\t"					\
-	"ldub	[%%g6 + %2], %%o5\n\t"					\
-	"ldub	[%%g6 + %4], %%o7\n\t"					\
+	"ldx	[%%g6 + %6], %%o6\n\t"					\
+	"ldub	[%%g6 + %5], %%o5\n\t"					\
+	"ldub	[%%g6 + %7], %%o7\n\t"					\
 	"wrpr	%%o5, 0x0, %%wstate\n\t"				\
 	"ldx	[%%sp + 2047 + 0x70], %%i6\n\t"				\
 	"ldx	[%%sp + 2047 + 0x78], %%i7\n\t"				\
-	"ldx	[%%g6 + %6], %%g4\n\t"					\
+	"ldx	[%%g6 + %9], %%g4\n\t"					\
 	"brz,pt %%o7, 1f\n\t"						\
 	" mov	%%g7, %0\n\t"						\
 	"b,a ret_from_syscall\n\t"					\
 	"1:\n\t"							\
-	: "=&r" (last)							\
+	: "=&r" (last), "=r" (current), "=r" (current_thread_info_reg),	\
+	  "=r" (__local_per_cpu_offset)					\
 	: "0" (task_thread_info(next)),					\
 	  "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_NEW_CHILD),            \
 	  "i" (TI_CWP), "i" (TI_TASK)					\
 	: "cc",								\
 	        "g1", "g2", "g3",                   "g7",		\
-	              "l2", "l3", "l4", "l5", "l6", "l7",		\
+	        "l1", "l2", "l3", "l4", "l5", "l6", "l7",		\
 	  "i0", "i1", "i2", "i3", "i4", "i5",				\
-	  "o0", "o1", "o2", "o3", "o4", "o5",       "o7" EXTRA_CLOBBER);\
+	  "o0", "o1", "o2", "o3", "o4", "o5",       "o7");		\
 	/* If you fuck with this, update ret_from_syscall code too. */	\
 	if (test_thread_flag(TIF_PERFCTR)) {				\
 		write_pcr(current_thread_info()->pcr_reg);		\