[PATCH] Add faster __iowrite32_copy routine for x86_64

This assembly version is measurably faster than the generic version in
lib/iomap_copy.c.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
new file mode 100644
index 0000000..8bbade5
--- /dev/null
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2006 PathScale, Inc.  All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/*
+ * override generic version in lib/iomap_copy.c
+ */
+ 	.globl __iowrite32_copy
+	.p2align 4
+__iowrite32_copy:
+	movl %edx,%ecx
+	rep movsd
+	ret