blob: e138994e1667dc9e45f4a1539b33ebe9ea70874a [file] [log] [blame]
David Howellsb920de12008-02-08 04:19:31 -08001/* Optimised simple memory checksum
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11#include <asm/cache.h>
12
13 .section .text
14 .balign L1_CACHE_BYTES
15
16###############################################################################
17#
18# unsigned int do_csum(const unsigned char *buff, size_t len)
19#
20###############################################################################
21 .globl do_csum
22 .type do_csum,@function
23do_csum:
24 movm [d2,d3],(sp)
25 mov d0,(12,sp)
26 mov d1,(16,sp)
27 mov d1,d2 # count
28 mov d0,a0 # buff
29 clr d1 # accumulator
30
31 cmp +0,d2
32 beq do_csum_done # return if zero-length buffer
33
34 # 4-byte align the buffer pointer
35 btst +3,a0
36 beq do_csum_now_4b_aligned
37
38 btst +1,a0
39 beq do_csum_addr_not_odd
40 movbu (a0),d0
41 inc a0
42 asl +8,d0
43 add d0,d1
44 addc +0,d1
45 add -1,d2
46do_csum_addr_not_odd:
47
48 cmp +2,d2
49 bcs do_csum_fewer_than_4
50 btst +2,a0
51 beq do_csum_now_4b_aligned
52 movhu (a0+),d0
53 add d0,d1
54 addc +0,d1
55 add -2,d2
56 cmp +4,d2
57 bcs do_csum_fewer_than_4
58
59do_csum_now_4b_aligned:
60 # we want to checksum as much as we can in chunks of 32 bytes
61 cmp +31,d2
62 bls do_csum_remainder # 4-byte aligned remainder
63
64 add -32,d2
65 mov +32,d3
66
67do_csum_loop:
68 mov (a0+),d0
69 add d0,d1
70 mov (a0+),e0
71 addc e0,d1
72 mov (a0+),e1
73 addc e1,d1
74 mov (a0+),e3
75 addc e3,d1
76 mov (a0+),d0
77 addc d0,d1
78 mov (a0+),e0
79 addc e0,d1
80 mov (a0+),e1
81 addc e1,d1
82 mov (a0+),e3
83 addc e3,d1
84 addc +0,d1
85
86 sub d3,d2
87 bcc do_csum_loop
88
89 add d3,d2
90 beq do_csum_done
91
92do_csum_remainder:
93 # cut 16-31 bytes down to 0-15
94 cmp +16,d2
95 bcs do_csum_fewer_than_16
96 mov (a0+),d0
97 add d0,d1
98 mov (a0+),e0
99 addc e0,d1
100 mov (a0+),e1
101 addc e1,d1
102 mov (a0+),e3
103 addc e3,d1
104 addc +0,d1
105 add -16,d2
106 beq do_csum_done
107
108do_csum_fewer_than_16:
109 # copy the remaining whole words
110 cmp +4,d2
111 bcs do_csum_fewer_than_4
112 cmp +8,d2
113 bcs do_csum_one_word
114 cmp +12,d2
115 bcs do_csum_two_words
116 mov (a0+),d0
117 add d0,d1
118 addc +0,d1
119do_csum_two_words:
120 mov (a0+),d0
121 add d0,d1
122 addc +0,d1
123do_csum_one_word:
124 mov (a0+),d0
125 add d0,d1
126 addc +0,d1
127
128do_csum_fewer_than_4:
129 and +3,d2
130 beq do_csum_done
131 xor_cmp d0,d0,+2,d2
132 bcs do_csum_fewer_than_2
133 movhu (a0+),d0
134do_csum_fewer_than_2:
135 and +1,d2
136 beq do_csum_add_last_bit
137 movbu (a0),d3
138 add d3,d0
139do_csum_add_last_bit:
140 add d0,d1
141 addc +0,d1
142
143do_csum_done:
144 # compress the checksum down to 16 bits
145 mov +0xffff0000,d2
146 and d1,d2
147 asl +16,d1
148 add d2,d1,d0
149 addc +0xffff,d0
150 lsr +16,d0
151
152 # flip the halves of the word result if the buffer was oddly aligned
153 mov (12,sp),d1
154 and +1,d1
155 beq do_csum_not_oddly_aligned
156 swaph d0,d0 # exchange bits 15:8 with 7:0
157
158do_csum_not_oddly_aligned:
159 ret [d2,d3],8
160
161do_csum_end:
162 .size do_csum, do_csum_end-do_csum