arch/sparc: Optimized memcpy, memset, copy_to_user, copy_from_user for M7/M8
New algorithm that takes advantage of the M7/M8 block init store
ASI, ie, overlapping pipelines and miss buffer filling.
Full details in code comments.
Signed-off-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/lib/M7patch.S b/arch/sparc/lib/M7patch.S
new file mode 100644
index 0000000..9000b7b
--- /dev/null
+++ b/arch/sparc/lib/M7patch.S
@@ -0,0 +1,51 @@
+/*
+ * M7patch.S: Patch generic routines with M7 variant.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define BRANCH_ALWAYS 0x10680000
+#define NOP 0x01000000
+#define NG_DO_PATCH(OLD, NEW) \
+ sethi %hi(NEW), %g1; \
+ or %g1, %lo(NEW), %g1; \
+ sethi %hi(OLD), %g2; \
+ or %g2, %lo(OLD), %g2; \
+ sub %g1, %g2, %g1; \
+ sethi %hi(BRANCH_ALWAYS), %g3; \
+ sll %g1, 11, %g1; \
+ srl %g1, 11 + 2, %g1; \
+ or %g3, %lo(BRANCH_ALWAYS), %g3; \
+ or %g3, %g1, %g3; \
+ stw %g3, [%g2]; \
+ sethi %hi(NOP), %g3; \
+ or %g3, %lo(NOP), %g3; \
+ stw %g3, [%g2 + 0x4]; \
+ flush %g2;
+
+ENTRY(m7_patch_copyops)
+ NG_DO_PATCH(memcpy, M7memcpy)
+ NG_DO_PATCH(raw_copy_from_user, M7copy_from_user)
+ NG_DO_PATCH(raw_copy_to_user, M7copy_to_user)
+ retl
+ nop
+ENDPROC(m7_patch_copyops)
+
+ENTRY(m7_patch_bzero)
+ NG_DO_PATCH(memset, M7memset)
+ NG_DO_PATCH(__bzero, M7bzero)
+ NG_DO_PATCH(__clear_user, NGclear_user)
+ NG_DO_PATCH(tsb_init, NGtsb_init)
+ retl
+ nop
+ENDPROC(m7_patch_bzero)
+
+ENTRY(m7_patch_pageops)
+ NG_DO_PATCH(copy_user_page, NG4copy_user_page)
+ NG_DO_PATCH(_clear_page, M7clear_page)
+ NG_DO_PATCH(clear_user_page, M7clear_user_page)
+ retl
+ nop
+ENDPROC(m7_patch_pageops)