[SPARC64]: Niagara copy/clear page.

Happily we have no D-cache aliasing issues on these
chips, so the implementation is very straightforward.

Add a stub in bootup which will be where the patching
calls will be made for niagara/sun4v/hypervisor.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 03fc0b5..f04f739 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -316,6 +316,24 @@
 	ba,pt	%xcc, spitfire_tlb_fixup
 	 nop
 
+	/* XXX Nothing branches to here yet, when %ver register indicates
+	 * XXX Niagara we should do this.
+	 */
+niagara_tlb_fixup:
+	mov	3, %g2		/* Set TLB type to hypervisor. */
+	sethi	%hi(tlb_type), %g1
+	stw	%g2, [%g1 + %lo(tlb_type)]
+
+	/* Patch copy/clear ops.  */
+	call	niagara_patch_copyops
+	 nop
+	call	niagara_patch_pageops
+	 nop
+
+	/* Patch TLB/cache ops.  */
+	call	hypervisor_patch_cachetlbops
+	 nop
+
 cheetah_tlb_fixup:
 	mov	2, %g2		/* Set TLB type to cheetah+. */
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 813f622..3d0e9a2 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -12,6 +12,7 @@
 	 U1memcpy.o U1copy_from_user.o U1copy_to_user.o \
 	 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
 	 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
+	 NGpage.o \
 	 copy_in_user.o user_fixup.o memmove.o \
 	 mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o
 
diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S
new file mode 100644
index 0000000..0e6152c
--- /dev/null
+++ b/arch/sparc64/lib/NGpage.S
@@ -0,0 +1,95 @@
+/* NGpage.S: Niagara optimize clear and copy page.
+ *
+ * Copyright (C) 2006 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+	.text
+	.align	32
+
+	/* This is heavily simplified from the sun4u variants
+	 * because Niagara does not have any D-cache aliasing issues
+	 * and also we don't need to use the FPU in order to implement
+	 * an optimal page copy/clear.
+	 */
+
+NGcopy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */
+	prefetch	[%o1 + 0x00], #one_read
+	mov		8, %g1
+	mov		16, %g2
+	mov		24, %g3
+	set		PAGE_SIZE, %g7
+
+1:	ldda		[%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
+	ldda		[%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
+	prefetch	[%o1 + 0x40], #one_read
+	add		%o1, 32, %o1
+	stxa		%o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+	ldda		[%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2
+	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	ldda		[%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4
+	add		%o1, 32, %o1
+	add		%o0, 32, %o0
+	stxa		%o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	subcc		%g7, 64, %g7
+	bne,pt		%xcc, 1b
+	 add		%o0, 32, %o0
+	retl
+	 nop
+
+NGclear_page:		/* %o0=dest */
+NGclear_user_page:	/* %o0=dest, %o1=vaddr */
+	mov		8, %g1
+	mov		16, %g2
+	mov		24, %g3
+	set		PAGE_SIZE, %g7
+
+1:	stxa		%g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	add		%o0, 32, %o0
+	stxa		%g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	subcc		%g7, 64, %g7
+	bne,pt		%xcc, 1b
+	 add		%o0, 32, %o0
+	retl
+	 nop
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	srl	%g1, 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara_patch_pageops
+	.type	niagara_patch_pageops,#function
+niagara_patch_pageops:
+	NG_DO_PATCH(copy_user_page, NGcopy_user_page)
+	NG_DO_PATCH(_clear_page, NGclear_page)
+	NG_DO_PATCH(clear_user_page, NGclear_user_page)
+	retl
+	 nop
+	.size	niagara_patch_pageops,.-niagara_patch_pageops