Blackfin arch: Faster Implementation of csum_tcpudp_nofold()
Avoid conditional branch instructions during carry bit additions.
Special thanks to Bernd.
Simplify: Use ((len + proto) << 8) like every other __LITTLE_ENDIAN__ machine
Cc: Bernd Schmidt <bernds_cb1@t-online.de>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
diff --git a/arch/blackfin/include/asm/checksum.h b/arch/blackfin/include/asm/checksum.h
index f67289a..793581f 100644
--- a/arch/blackfin/include/asm/checksum.h
+++ b/arch/blackfin/include/asm/checksum.h
@@ -63,23 +63,23 @@
csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
unsigned short proto, __wsum sum)
{
+ unsigned int carry;
- __asm__ ("%0 = %0 + %1;\n\t"
- "CC = AC0;\n\t"
- "if !CC jump 4;\n\t"
- "%0 = %0 + %4;\n\t"
- "%0 = %0 + %2;\n\t"
- "CC = AC0;\n\t"
- "if !CC jump 4;\n\t"
- "%0 = %0 + %4;\n\t"
- "%0 = %0 + %3;\n\t"
- "CC = AC0;\n\t"
- "if !CC jump 4;\n\t"
- "%0 = %0 + %4;\n\t"
- "NOP;\n\t"
- : "=d" (sum)
- : "d" (daddr), "d" (saddr), "d" ((ntohs(len)<<16)+proto*256), "d" (1), "0"(sum)
- : "CC");
+ __asm__ ("%0 = %0 + %2;\n\t"
+ "CC = AC0;\n\t"
+ "%1 = CC;\n\t"
+ "%0 = %0 + %1;\n\t"
+ "%0 = %0 + %3;\n\t"
+ "CC = AC0;\n\t"
+ "%1 = CC;\n\t"
+ "%0 = %0 + %1;\n\t"
+ "%0 = %0 + %4;\n\t"
+ "CC = AC0;\n\t"
+ "%1 = CC;\n\t"
+ "%0 = %0 + %1;\n\t"
+ : "=d" (sum), "=&d" (carry)
+ : "d" (daddr), "d" (saddr), "d" ((len + proto) << 8), "0"(sum)
+ : "CC");
return (sum);
}