raid5: add AVX optimized RAID5 checksumming Optimize RAID5 xor checksumming by taking advantage of 256-bit YMM registers introduced in AVX. Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com> Signed-off-by: NeilBrown <neilb@suse.de>

commit: ea4d26ae24e58fbd2c61de9242adab053cb982d8 [log] [tgz]
author: Jim Kukunas <james.t.kukunas@linux.intel.com> Tue May 22 13:54:04 2012 +1000
committer: NeilBrown <neilb@suse.de> Tue May 22 13:54:04 2012 +1000
tree: 3115dd168f0cf1eb1eb5dd6aecc385cfa0e8bc05
parent: 56a519913eeba2bdae4d7ee39e80fab442c3836c [diff] [blame]
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 133b40a..4545708 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h

@@ -861,6 +861,9 @@
 	.do_5 = xor_sse_5,
 };
 
+/* Also try the AVX routines */
+#include "xor_avx.h"
+
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>
 
@@ -871,6 +874,7 @@
 	xor_speed(&xor_block_8regs_p);			\
 	xor_speed(&xor_block_32regs);			\
 	xor_speed(&xor_block_32regs_p);			\
+	AVX_XOR_SPEED;					\
 	if (cpu_has_xmm)				\
 		xor_speed(&xor_block_pIII_sse);		\
 	if (cpu_has_mmx) {				\
@@ -883,6 +887,6 @@
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
 #define XOR_SELECT_TEMPLATE(FASTEST)			\
-	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
+	AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
 
 #endif /* _ASM_X86_XOR_32_H */
commit	ea4d26ae24e58fbd2c61de9242adab053cb982d8	[log] [tgz]
author	Jim Kukunas <james.t.kukunas@linux.intel.com>	Tue May 22 13:54:04 2012 +1000
committer	NeilBrown <neilb@suse.de>	Tue May 22 13:54:04 2012 +1000
tree	3115dd168f0cf1eb1eb5dd6aecc385cfa0e8bc05
parent	56a519913eeba2bdae4d7ee39e80fab442c3836c [diff] [blame]