blob: 10b45909610ca6f4ca6f6f8bdc664b79c2f2bd6f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
Russell King6ebbf2c2014-06-30 16:29:12 +010010#include <asm/assembler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070011
12/*
13 * unsigned int
14 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
15 * r0 = src, r1 = dst, r2 = len, r3 = sum
16 * Returns : r0 = checksum
17 *
18 * Note that 'tst' and 'teq' preserve the carry flag.
19 */
20
21src .req r0
22dst .req r1
23len .req r2
24sum .req r3
25
Nicolas Pitre8adbb372005-11-11 21:51:49 +000026.Lzero: mov r0, sum
Catalin Marinas90303b12006-01-12 16:53:51 +000027 load_regs
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
29 /*
30 * Align an unaligned destination pointer. We know that
31 * we have >= 8 bytes here, so we don't need to check
32 * the length. Note that the source pointer hasn't been
33 * aligned yet.
34 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000035.Ldst_unaligned:
36 tst dst, #1
37 beq .Ldst_16bit
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39 load1b ip
40 sub len, len, #1
41 adcs sum, sum, ip, put_byte_1 @ update checksum
42 strb ip, [dst], #1
43 tst dst, #2
Russell King6ebbf2c2014-06-30 16:29:12 +010044 reteq lr @ dst is now 32bit aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
Nicolas Pitre8adbb372005-11-11 21:51:49 +000046.Ldst_16bit: load2b r8, ip
Linus Torvalds1da177e2005-04-16 15:20:36 -070047 sub len, len, #2
48 adcs sum, sum, r8, put_byte_0
49 strb r8, [dst], #1
50 adcs sum, sum, ip, put_byte_1
51 strb ip, [dst], #1
Russell King6ebbf2c2014-06-30 16:29:12 +010052 ret lr @ dst is now 32bit aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 /*
55 * Handle 0 to 7 bytes, with any alignment of source and
56 * destination pointers. Note that when we get here, C = 0
57 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +000058.Lless8: teq len, #0 @ check for zero count
59 beq .Lzero
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61 /* we must have at least one byte. */
62 tst dst, #1 @ dst 16-bit aligned
Nicolas Pitre8adbb372005-11-11 21:51:49 +000063 beq .Lless8_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
65 /* Align dst */
66 load1b ip
67 sub len, len, #1
68 adcs sum, sum, ip, put_byte_1 @ update checksum
69 strb ip, [dst], #1
70 tst len, #6
Nicolas Pitre8adbb372005-11-11 21:51:49 +000071 beq .Lless8_byteonly
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
731: load2b r8, ip
74 sub len, len, #2
75 adcs sum, sum, r8, put_byte_0
76 strb r8, [dst], #1
77 adcs sum, sum, ip, put_byte_1
78 strb ip, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000079.Lless8_aligned:
80 tst len, #6
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 bne 1b
Nicolas Pitre8adbb372005-11-11 21:51:49 +000082.Lless8_byteonly:
Linus Torvalds1da177e2005-04-16 15:20:36 -070083 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000084 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 load1b r8
86 adcs sum, sum, r8, put_byte_0 @ update checksum
87 strb r8, [dst], #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +000088 b .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90FN_ENTRY
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 save_regs
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
93 cmp len, #8 @ Ensure that we have at least
Nicolas Pitre8adbb372005-11-11 21:51:49 +000094 blo .Lless8 @ 8 bytes to copy.
Linus Torvalds1da177e2005-04-16 15:20:36 -070095
96 adds sum, sum, #0 @ C = 0
97 tst dst, #3 @ Test destination alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +000098 blne .Ldst_unaligned @ align destination, return here
Linus Torvalds1da177e2005-04-16 15:20:36 -070099
100 /*
101 * Ok, the dst pointer is now 32bit aligned, and we know
102 * that we must have more than 4 bytes to copy. Note
103 * that C contains the carry from the dst alignment above.
104 */
105
106 tst src, #3 @ Test source alignment
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000107 bne .Lsrc_not_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109 /* Routine for src & dst aligned */
110
111 bics ip, len, #15
112 beq 2f
113
1141: load4l r4, r5, r6, r7
115 stmia dst!, {r4, r5, r6, r7}
116 adcs sum, sum, r4
117 adcs sum, sum, r5
118 adcs sum, sum, r6
119 adcs sum, sum, r7
120 sub ip, ip, #16
121 teq ip, #0
122 bne 1b
123
1242: ands ip, len, #12
125 beq 4f
126 tst ip, #8
127 beq 3f
128 load2l r4, r5
129 stmia dst!, {r4, r5}
130 adcs sum, sum, r4
131 adcs sum, sum, r5
132 tst ip, #4
133 beq 4f
134
1353: load1l r4
136 str r4, [dst], #4
137 adcs sum, sum, r4
138
1394: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000140 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 load1l r4
142 tst len, #2
143 mov r5, r4, get_byte_0
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000144 beq .Lexit
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100145 adcs sum, sum, r4, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 strb r5, [dst], #1
147 mov r5, r4, get_byte_1
148 strb r5, [dst], #1
149 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000150.Lexit: tst len, #1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 strneb r5, [dst], #1
152 andne r5, r5, #255
153 adcnes sum, sum, r5, put_byte_0
154
155 /*
156 * If the dst pointer was not 16-bit aligned, we
157 * need to rotate the checksum here to get around
158 * the inefficient byte manipulations in the
159 * architecture independent code.
160 */
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000161.Ldone: adc r0, sum, #0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 ldr sum, [sp, #0] @ dst
163 tst sum, #1
164 movne r0, r0, ror #8
Catalin Marinas90303b12006-01-12 16:53:51 +0000165 load_regs
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000167.Lsrc_not_aligned:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 adc sum, sum, #0 @ include C from dst alignment
169 and ip, src, #3
170 bic src, src, #3
171 load1l r5
172 cmp ip, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000173 beq .Lsrc2_aligned
174 bhi .Lsrc3_aligned
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100175 mov r4, r5, lspull #8 @ C = 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 bics ip, len, #15
177 beq 2f
1781: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100179 orr r4, r4, r5, lspush #24
180 mov r5, r5, lspull #8
181 orr r5, r5, r6, lspush #24
182 mov r6, r6, lspull #8
183 orr r6, r6, r7, lspush #24
184 mov r7, r7, lspull #8
185 orr r7, r7, r8, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 stmia dst!, {r4, r5, r6, r7}
187 adcs sum, sum, r4
188 adcs sum, sum, r5
189 adcs sum, sum, r6
190 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100191 mov r4, r8, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 sub ip, ip, #16
193 teq ip, #0
194 bne 1b
1952: ands ip, len, #12
196 beq 4f
197 tst ip, #8
198 beq 3f
199 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100200 orr r4, r4, r5, lspush #24
201 mov r5, r5, lspull #8
202 orr r5, r5, r6, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 stmia dst!, {r4, r5}
204 adcs sum, sum, r4
205 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100206 mov r4, r6, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 tst ip, #4
208 beq 4f
2093: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100210 orr r4, r4, r5, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 str r4, [dst], #4
212 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100213 mov r4, r5, lspull #8
Linus Torvalds1da177e2005-04-16 15:20:36 -07002144: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000215 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 mov r5, r4, get_byte_0
217 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000218 beq .Lexit
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100219 adcs sum, sum, r4, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 strb r5, [dst], #1
221 mov r5, r4, get_byte_1
222 strb r5, [dst], #1
223 mov r5, r4, get_byte_2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000224 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100226.Lsrc2_aligned: mov r4, r5, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 adds sum, sum, #0
228 bics ip, len, #15
229 beq 2f
2301: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100231 orr r4, r4, r5, lspush #16
232 mov r5, r5, lspull #16
233 orr r5, r5, r6, lspush #16
234 mov r6, r6, lspull #16
235 orr r6, r6, r7, lspush #16
236 mov r7, r7, lspull #16
237 orr r7, r7, r8, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 stmia dst!, {r4, r5, r6, r7}
239 adcs sum, sum, r4
240 adcs sum, sum, r5
241 adcs sum, sum, r6
242 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100243 mov r4, r8, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 sub ip, ip, #16
245 teq ip, #0
246 bne 1b
2472: ands ip, len, #12
248 beq 4f
249 tst ip, #8
250 beq 3f
251 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100252 orr r4, r4, r5, lspush #16
253 mov r5, r5, lspull #16
254 orr r5, r5, r6, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 stmia dst!, {r4, r5}
256 adcs sum, sum, r4
257 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100258 mov r4, r6, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 tst ip, #4
260 beq 4f
2613: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100262 orr r4, r4, r5, lspush #16
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 str r4, [dst], #4
264 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100265 mov r4, r5, lspull #16
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000267 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 mov r5, r4, get_byte_0
269 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000270 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 adcs sum, sum, r4
272 strb r5, [dst], #1
273 mov r5, r4, get_byte_1
274 strb r5, [dst], #1
275 tst len, #1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000276 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 load1b r5
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000278 b .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100280.Lsrc3_aligned: mov r4, r5, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 adds sum, sum, #0
282 bics ip, len, #15
283 beq 2f
2841: load4l r5, r6, r7, r8
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100285 orr r4, r4, r5, lspush #8
286 mov r5, r5, lspull #24
287 orr r5, r5, r6, lspush #8
288 mov r6, r6, lspull #24
289 orr r6, r6, r7, lspush #8
290 mov r7, r7, lspull #24
291 orr r7, r7, r8, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 stmia dst!, {r4, r5, r6, r7}
293 adcs sum, sum, r4
294 adcs sum, sum, r5
295 adcs sum, sum, r6
296 adcs sum, sum, r7
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100297 mov r4, r8, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 sub ip, ip, #16
299 teq ip, #0
300 bne 1b
3012: ands ip, len, #12
302 beq 4f
303 tst ip, #8
304 beq 3f
305 load2l r5, r6
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100306 orr r4, r4, r5, lspush #8
307 mov r5, r5, lspull #24
308 orr r5, r5, r6, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 stmia dst!, {r4, r5}
310 adcs sum, sum, r4
311 adcs sum, sum, r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100312 mov r4, r6, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 tst ip, #4
314 beq 4f
3153: load1l r5
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100316 orr r4, r4, r5, lspush #8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 str r4, [dst], #4
318 adcs sum, sum, r4
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100319 mov r4, r5, lspull #24
Linus Torvalds1da177e2005-04-16 15:20:36 -07003204: ands len, len, #3
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000321 beq .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 mov r5, r4, get_byte_0
323 tst len, #2
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000324 beq .Lexit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 strb r5, [dst], #1
326 adcs sum, sum, r4
327 load1l r4
328 mov r5, r4, get_byte_0
329 strb r5, [dst], #1
Victor Kamenskyd98b90e2014-02-25 08:41:09 +0100330 adcs sum, sum, r4, lspush #24
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 mov r5, r4, get_byte_1
Nicolas Pitre8adbb372005-11-11 21:51:49 +0000332 b .Lexit
Catalin Marinas93ed3972008-08-28 11:22:32 +0100333FN_EXIT