blob: 4a4609c19095f9188005e4e2e9b68d235008b079 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * unsigned int
13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14 * r0 = src, r1 = dst, r2 = len, r3 = sum
15 * Returns : r0 = checksum
16 *
17 * Note that 'tst' and 'teq' preserve the carry flag.
18 */
19
20src .req r0
21dst .req r1
22len .req r2
23sum .req r3
24
Nicolas Pitre8adbb372005-11-11 21:51:49 +000025.Lzero: mov r0, sum
Linus Torvalds1da177e2005-04-16 15:20:36 -070026 load_regs ea
27
28 /*
29 * Align an unaligned destination pointer. We know that
30 * we have >= 8 bytes here, so we don't need to check
31 * the length. Note that the source pointer hasn't been
32 * aligned yet.
33 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000034.Ldst_unaligned:
35 tst dst, #1
36 beq .Ldst_16bit
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38 load1b ip
39 sub len, len, #1
40 adcs sum, sum, ip, put_byte_1 @ update checksum
41 strb ip, [dst], #1
42 tst dst, #2
43 moveq pc, lr @ dst is now 32bit aligned
44
Nicolas Pitre8adbb372005-11-11 21:51:49 +000045.Ldst_16bit: load2b r8, ip
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 sub len, len, #2
47 adcs sum, sum, r8, put_byte_0
48 strb r8, [dst], #1
49 adcs sum, sum, ip, put_byte_1
50 strb ip, [dst], #1
51 mov pc, lr @ dst is now 32bit aligned
52
53 /*
54 * Handle 0 to 7 bytes, with any alignment of source and
55 * destination pointers. Note that when we get here, C = 0
56 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000057.Lless8: teq len, #0 @ check for zero count
58 beq .Lzero
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60 /* we must have at least one byte. */
61 tst dst, #1 @ dst 16-bit aligned
Nicolas Pitre8adbb372005-11-11 21:51:49 +000062 beq .Lless8_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070063
64 /* Align dst */
65 load1b ip
66 sub len, len, #1
67 adcs sum, sum, ip, put_byte_1 @ update checksum
68 strb ip, [dst], #1
69 tst len, #6
Nicolas Pitre8adbb372005-11-11 21:51:49 +000070 beq .Lless8_byteonly
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
721: load2b r8, ip
73 sub len, len, #2
74 adcs sum, sum, r8, put_byte_0
75 strb r8, [dst], #1
76 adcs sum, sum, ip, put_byte_1
77 strb ip, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000078.Lless8_aligned:
79 tst len, #6
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 bne 1b
Nicolas Pitre8adbb372005-11-11 21:51:49 +000081.Lless8_byteonly:
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000083 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070084 load1b r8
85 adcs sum, sum, r8, put_byte_0 @ update checksum
86 strb r8, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000087 b .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89FN_ENTRY
90 mov ip, sp
91 save_regs
92 sub fp, ip, #4
93
94 cmp len, #8 @ Ensure that we have at least
Nicolas Pitre8adbb372005-11-11 21:51:49 +000095 blo .Lless8 @ 8 bytes to copy.
Linus Torvalds1da177e2005-04-16 15:20:36 -070096
97 adds sum, sum, #0 @ C = 0
98 tst dst, #3 @ Test destination alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +000099 blne .Ldst_unaligned @ align destination, return here
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /*
102 * Ok, the dst pointer is now 32bit aligned, and we know
103 * that we must have more than 4 bytes to copy. Note
104 * that C contains the carry from the dst alignment above.
105 */
106
107 tst src, #3 @ Test source alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000108 bne .Lsrc_not_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109
110 /* Routine for src & dst aligned */
111
112 bics ip, len, #15
113 beq 2f
114
1151: load4l r4, r5, r6, r7
116 stmia dst!, {r4, r5, r6, r7}
117 adcs sum, sum, r4
118 adcs sum, sum, r5
119 adcs sum, sum, r6
120 adcs sum, sum, r7
121 sub ip, ip, #16
122 teq ip, #0
123 bne 1b
124
1252: ands ip, len, #12
126 beq 4f
127 tst ip, #8
128 beq 3f
129 load2l r4, r5
130 stmia dst!, {r4, r5}
131 adcs sum, sum, r4
132 adcs sum, sum, r5
133 tst ip, #4
134 beq 4f
135
1363: load1l r4
137 str r4, [dst], #4
138 adcs sum, sum, r4
139
1404: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000141 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 load1l r4
143 tst len, #2
144 mov r5, r4, get_byte_0
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000145 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 adcs sum, sum, r4, push #16
147 strb r5, [dst], #1
148 mov r5, r4, get_byte_1
149 strb r5, [dst], #1
150 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000151.Lexit: tst len, #1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 strneb r5, [dst], #1
153 andne r5, r5, #255
154 adcnes sum, sum, r5, put_byte_0
155
156 /*
157 * If the dst pointer was not 16-bit aligned, we
158 * need to rotate the checksum here to get around
159 * the inefficient byte manipulations in the
160 * architecture independent code.
161 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000162.Ldone: adc r0, sum, #0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 ldr sum, [sp, #0] @ dst
164 tst sum, #1
165 movne r0, r0, ror #8
166 load_regs ea
167
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000168.Lsrc_not_aligned:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 adc sum, sum, #0 @ include C from dst alignment
170 and ip, src, #3
171 bic src, src, #3
172 load1l r5
173 cmp ip, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000174 beq .Lsrc2_aligned
175 bhi .Lsrc3_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 mov r4, r5, pull #8 @ C = 0
177 bics ip, len, #15
178 beq 2f
1791: load4l r5, r6, r7, r8
180 orr r4, r4, r5, push #24
181 mov r5, r5, pull #8
182 orr r5, r5, r6, push #24
183 mov r6, r6, pull #8
184 orr r6, r6, r7, push #24
185 mov r7, r7, pull #8
186 orr r7, r7, r8, push #24
187 stmia dst!, {r4, r5, r6, r7}
188 adcs sum, sum, r4
189 adcs sum, sum, r5
190 adcs sum, sum, r6
191 adcs sum, sum, r7
192 mov r4, r8, pull #8
193 sub ip, ip, #16
194 teq ip, #0
195 bne 1b
1962: ands ip, len, #12
197 beq 4f
198 tst ip, #8
199 beq 3f
200 load2l r5, r6
201 orr r4, r4, r5, push #24
202 mov r5, r5, pull #8
203 orr r5, r5, r6, push #24
204 stmia dst!, {r4, r5}
205 adcs sum, sum, r4
206 adcs sum, sum, r5
207 mov r4, r6, pull #8
208 tst ip, #4
209 beq 4f
2103: load1l r5
211 orr r4, r4, r5, push #24
212 str r4, [dst], #4
213 adcs sum, sum, r4
214 mov r4, r5, pull #8
2154: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000216 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 mov r5, r4, get_byte_0
218 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000219 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 adcs sum, sum, r4, push #16
221 strb r5, [dst], #1
222 mov r5, r4, get_byte_1
223 strb r5, [dst], #1
224 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000225 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000227.Lsrc2_aligned: mov r4, r5, pull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 adds sum, sum, #0
229 bics ip, len, #15
230 beq 2f
2311: load4l r5, r6, r7, r8
232 orr r4, r4, r5, push #16
233 mov r5, r5, pull #16
234 orr r5, r5, r6, push #16
235 mov r6, r6, pull #16
236 orr r6, r6, r7, push #16
237 mov r7, r7, pull #16
238 orr r7, r7, r8, push #16
239 stmia dst!, {r4, r5, r6, r7}
240 adcs sum, sum, r4
241 adcs sum, sum, r5
242 adcs sum, sum, r6
243 adcs sum, sum, r7
244 mov r4, r8, pull #16
245 sub ip, ip, #16
246 teq ip, #0
247 bne 1b
2482: ands ip, len, #12
249 beq 4f
250 tst ip, #8
251 beq 3f
252 load2l r5, r6
253 orr r4, r4, r5, push #16
254 mov r5, r5, pull #16
255 orr r5, r5, r6, push #16
256 stmia dst!, {r4, r5}
257 adcs sum, sum, r4
258 adcs sum, sum, r5
259 mov r4, r6, pull #16
260 tst ip, #4
261 beq 4f
2623: load1l r5
263 orr r4, r4, r5, push #16
264 str r4, [dst], #4
265 adcs sum, sum, r4
266 mov r4, r5, pull #16
2674: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000268 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 mov r5, r4, get_byte_0
270 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000271 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 adcs sum, sum, r4
273 strb r5, [dst], #1
274 mov r5, r4, get_byte_1
275 strb r5, [dst], #1
276 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000277 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 load1b r5
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000279 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000281.Lsrc3_aligned: mov r4, r5, pull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 adds sum, sum, #0
283 bics ip, len, #15
284 beq 2f
2851: load4l r5, r6, r7, r8
286 orr r4, r4, r5, push #8
287 mov r5, r5, pull #24
288 orr r5, r5, r6, push #8
289 mov r6, r6, pull #24
290 orr r6, r6, r7, push #8
291 mov r7, r7, pull #24
292 orr r7, r7, r8, push #8
293 stmia dst!, {r4, r5, r6, r7}
294 adcs sum, sum, r4
295 adcs sum, sum, r5
296 adcs sum, sum, r6
297 adcs sum, sum, r7
298 mov r4, r8, pull #24
299 sub ip, ip, #16
300 teq ip, #0
301 bne 1b
3022: ands ip, len, #12
303 beq 4f
304 tst ip, #8
305 beq 3f
306 load2l r5, r6
307 orr r4, r4, r5, push #8
308 mov r5, r5, pull #24
309 orr r5, r5, r6, push #8
310 stmia dst!, {r4, r5}
311 adcs sum, sum, r4
312 adcs sum, sum, r5
313 mov r4, r6, pull #24
314 tst ip, #4
315 beq 4f
3163: load1l r5
317 orr r4, r4, r5, push #8
318 str r4, [dst], #4
319 adcs sum, sum, r4
320 mov r4, r5, pull #24
3214: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000322 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 mov r5, r4, get_byte_0
324 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000325 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 strb r5, [dst], #1
327 adcs sum, sum, r4
328 load1l r4
329 mov r5, r4, get_byte_0
330 strb r5, [dst], #1
331 adcs sum, sum, r4, push #24
332 mov r5, r4, get_byte_1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000333 b .Lexit