sparc64: Fill a missing delay slot.

If the code were already aligned to 64 bytes, wr instruction would be executed
twice --- once in delay slot and once in the jump target.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index d24cfe1..e3b65d8 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -106,6 +106,7 @@
  */
 #define write_pic(__p)  					\
 	__asm__ __volatile__("ba,pt	%%xcc, 99f\n\t"		\
+			     " nop\n\t"				\
 			     ".align	64\n"			\
 			  "99:wr	%0, 0x0, %%pic\n\t"	\
 			     "rd	%%pic, %%g0" : : "r" (__p))