m68knommu: add optimize memmove() function

Add an m68k/coldfire optimized memmove() function for the m68knommu arch.
This is the same function as used by m68k. Simple speed tests show this
is faster once buffers are larger than 4 bytes, and significantly faster
on much larger buffers (4 times faster above about 100 bytes).

This also goes part of the way to fixing a regression caused by commit
ea61bc461d09e8d331a307916530aaae808c72a2 ("m68k/m68knommu: merge MMU and
non-MMU string.h"), which breaks non-coldfire non-mmu builds (which is
the 68x328 and 68360 families). They currently have no memmove() fucntion
defined, since there was none in the m68knommu/lib functions.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
diff --git a/arch/m68knommu/lib/memmove.c b/arch/m68knommu/lib/memmove.c
new file mode 100644
index 0000000..b3dcfe9
--- /dev/null
+++ b/arch/m68knommu/lib/memmove.c
@@ -0,0 +1,105 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#define __IN_STRING_C
+
+#include <linux/module.h>
+#include <linux/string.h>
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	void *xdest = dest;
+	size_t temp;
+
+	if (!n)
+		return xdest;
+
+	if (dest < src) {
+		if ((long)dest & 1) {
+			char *cdest = dest;
+			const char *csrc = src;
+			*cdest++ = *csrc++;
+			dest = cdest;
+			src = csrc;
+			n--;
+		}
+		if (n > 2 && (long)dest & 2) {
+			short *sdest = dest;
+			const short *ssrc = src;
+			*sdest++ = *ssrc++;
+			dest = sdest;
+			src = ssrc;
+			n -= 2;
+		}
+		temp = n >> 2;
+		if (temp) {
+			long *ldest = dest;
+			const long *lsrc = src;
+			temp--;
+			do
+				*ldest++ = *lsrc++;
+			while (temp--);
+			dest = ldest;
+			src = lsrc;
+		}
+		if (n & 2) {
+			short *sdest = dest;
+			const short *ssrc = src;
+			*sdest++ = *ssrc++;
+			dest = sdest;
+			src = ssrc;
+		}
+		if (n & 1) {
+			char *cdest = dest;
+			const char *csrc = src;
+			*cdest = *csrc;
+		}
+	} else {
+		dest = (char *)dest + n;
+		src = (const char *)src + n;
+		if ((long)dest & 1) {
+			char *cdest = dest;
+			const char *csrc = src;
+			*--cdest = *--csrc;
+			dest = cdest;
+			src = csrc;
+			n--;
+		}
+		if (n > 2 && (long)dest & 2) {
+			short *sdest = dest;
+			const short *ssrc = src;
+			*--sdest = *--ssrc;
+			dest = sdest;
+			src = ssrc;
+			n -= 2;
+		}
+		temp = n >> 2;
+		if (temp) {
+			long *ldest = dest;
+			const long *lsrc = src;
+			temp--;
+			do
+				*--ldest = *--lsrc;
+			while (temp--);
+			dest = ldest;
+			src = lsrc;
+		}
+		if (n & 2) {
+			short *sdest = dest;
+			const short *ssrc = src;
+			*--sdest = *--ssrc;
+			dest = sdest;
+			src = ssrc;
+		}
+		if (n & 1) {
+			char *cdest = dest;
+			const char *csrc = src;
+			*--cdest = *--csrc;
+		}
+	}
+	return xdest;
+}
+EXPORT_SYMBOL(memmove);