blob: c0a77fe038befda70c6f4c67c06557bd335a89b4 [file] [log] [blame]
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +09001/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 */
Atsushi Nemotof860c902006-12-13 01:22:06 +090011#include <linux/errno.h>
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090012#include <asm/asm.h>
Atsushi Nemotof860c902006-12-13 01:22:06 +090013#include <asm/asm-offsets.h>
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090014#include <asm/regdef.h>
15
16#ifdef CONFIG_64BIT
Atsushi Nemoto52ffe762006-12-08 01:04:31 +090017/*
18 * As we are sharing code base with the mips32 tree (which use the o32 ABI
19 * register definitions). We need to redefine the register definitions from
20 * the n64 ABI register naming to the o32 ABI register naming.
21 */
22#undef t0
23#undef t1
24#undef t2
25#undef t3
26#define t0 $8
27#define t1 $9
28#define t2 $10
29#define t3 $11
30#define t4 $12
31#define t5 $13
32#define t6 $14
33#define t7 $15
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +090034
35#define USE_DOUBLE
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090036#endif
37
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +090038#ifdef USE_DOUBLE
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090039
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +090040#define LOAD ld
41#define ADD daddu
42#define NBYTES 8
43
44#else
45
46#define LOAD lw
47#define ADD addu
48#define NBYTES 4
49
50#endif /* USE_DOUBLE */
51
52#define UNIT(unit) ((unit)*NBYTES)
53
54#define ADDC(sum,reg) \
55 ADD sum, reg; \
56 sltu v1, sum, reg; \
57 ADD sum, v1
58
59#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
60 LOAD _t0, (offset + UNIT(0))(src); \
61 LOAD _t1, (offset + UNIT(1))(src); \
62 LOAD _t2, (offset + UNIT(2))(src); \
63 LOAD _t3, (offset + UNIT(3))(src); \
64 ADDC(sum, _t0); \
65 ADDC(sum, _t1); \
66 ADDC(sum, _t2); \
67 ADDC(sum, _t3)
68
69#ifdef USE_DOUBLE
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090070#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +090071 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
72#else
73#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
74 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
75 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
76#endif
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090077
78/*
79 * a0: source address
80 * a1: length of the area to checksum
81 * a2: partial checksum
82 */
83
84#define src a0
85#define sum v0
86
87 .text
88 .set noreorder
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090089 .align 5
90LEAF(csum_partial)
91 move sum, zero
Atsushi Nemoto52ffe762006-12-08 01:04:31 +090092 move t7, zero
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090093
94 sltiu t8, a1, 0x8
95 bnez t8, small_csumcpy /* < 8 bytes to copy */
Atsushi Nemoto52ffe762006-12-08 01:04:31 +090096 move t2, a1
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090097
Atsushi Nemoto773ff782006-12-08 01:04:45 +090098 andi t7, src, 0x1 /* odd buffer? */
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +090099
100hword_align:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900101 beqz t7, word_align
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900102 andi t8, src, 0x2
103
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900104 lbu t0, (src)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900105 LONG_SUBU a1, a1, 0x1
106#ifdef __MIPSEL__
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900107 sll t0, t0, 8
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900108#endif
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900109 ADDC(sum, t0)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900110 PTR_ADDU src, src, 0x1
111 andi t8, src, 0x2
112
113word_align:
114 beqz t8, dword_align
115 sltiu t8, a1, 56
116
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900117 lhu t0, (src)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900118 LONG_SUBU a1, a1, 0x2
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900119 ADDC(sum, t0)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900120 sltiu t8, a1, 56
121 PTR_ADDU src, src, 0x2
122
123dword_align:
124 bnez t8, do_end_words
125 move t8, a1
126
127 andi t8, src, 0x4
128 beqz t8, qword_align
129 andi t8, src, 0x8
130
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900131 lw t0, 0x00(src)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900132 LONG_SUBU a1, a1, 0x4
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900133 ADDC(sum, t0)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900134 PTR_ADDU src, src, 0x4
135 andi t8, src, 0x8
136
137qword_align:
138 beqz t8, oword_align
139 andi t8, src, 0x10
140
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +0900141#ifdef USE_DOUBLE
142 ld t0, 0x00(src)
143 LONG_SUBU a1, a1, 0x8
144 ADDC(sum, t0)
145#else
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900146 lw t0, 0x00(src)
147 lw t1, 0x04(src)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900148 LONG_SUBU a1, a1, 0x8
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900149 ADDC(sum, t0)
150 ADDC(sum, t1)
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +0900151#endif
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900152 PTR_ADDU src, src, 0x8
153 andi t8, src, 0x10
154
155oword_align:
156 beqz t8, begin_movement
157 LONG_SRL t8, a1, 0x7
158
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +0900159#ifdef USE_DOUBLE
160 ld t0, 0x00(src)
161 ld t1, 0x08(src)
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900162 ADDC(sum, t0)
163 ADDC(sum, t1)
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +0900164#else
165 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
166#endif
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900167 LONG_SUBU a1, a1, 0x10
168 PTR_ADDU src, src, 0x10
169 LONG_SRL t8, a1, 0x7
170
171begin_movement:
172 beqz t8, 1f
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900173 andi t2, a1, 0x40
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900174
175move_128bytes:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900176 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
177 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
178 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
179 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900180 LONG_SUBU t8, t8, 0x01
181 bnez t8, move_128bytes
182 PTR_ADDU src, src, 0x80
183
1841:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900185 beqz t2, 1f
186 andi t2, a1, 0x20
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900187
188move_64bytes:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900189 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
190 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900191 PTR_ADDU src, src, 0x40
192
1931:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900194 beqz t2, do_end_words
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900195 andi t8, a1, 0x1c
196
197move_32bytes:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900198 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900199 andi t8, a1, 0x1c
200 PTR_ADDU src, src, 0x20
201
202do_end_words:
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900203 beqz t8, small_csumcpy
204 andi t2, a1, 0x3
205 LONG_SRL t8, t8, 0x2
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900206
207end_words:
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900208 lw t0, (src)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900209 LONG_SUBU t8, t8, 0x1
Atsushi Nemoto52ffe762006-12-08 01:04:31 +0900210 ADDC(sum, t0)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900211 bnez t8, end_words
212 PTR_ADDU src, src, 0x4
213
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900214/* unknown src alignment and < 8 bytes to go */
215small_csumcpy:
216 move a1, t2
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900217
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900218 andi t0, a1, 4
219 beqz t0, 1f
220 andi t0, a1, 2
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900221
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900222 /* Still a full word to go */
223 ulw t1, (src)
224 PTR_ADDIU src, 4
225 ADDC(sum, t1)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900226
Atsushi Nemoto773ff782006-12-08 01:04:45 +09002271: move t1, zero
228 beqz t0, 1f
229 andi t0, a1, 1
230
231 /* Still a halfword to go */
232 ulhu t1, (src)
233 PTR_ADDIU src, 2
234
2351: beqz t0, 1f
236 sll t1, t1, 16
237
238 lbu t2, (src)
239 nop
240
241#ifdef __MIPSEB__
242 sll t2, t2, 8
243#endif
244 or t1, t2
245
2461: ADDC(sum, t1)
247
248 /* fold checksum */
Atsushi Nemotoed99e2b2006-12-08 01:04:51 +0900249#ifdef USE_DOUBLE
250 dsll32 v1, sum, 0
251 daddu sum, v1
252 sltu v1, sum, v1
253 dsra32 sum, sum, 0
254 addu sum, v1
255#endif
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900256 sll v1, sum, 16
257 addu sum, v1
258 sltu v1, sum, v1
259 srl sum, sum, 16
260 addu sum, v1
261
262 /* odd buffer alignment? */
263 beqz t7, 1f
264 nop
265 sll v1, sum, 8
266 srl sum, sum, 8
267 or sum, v1
268 andi sum, 0xffff
2691:
270 .set reorder
271 /* Add the passed partial csum. */
272 ADDC(sum, a2)
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900273 jr ra
Atsushi Nemoto773ff782006-12-08 01:04:45 +0900274 .set noreorder
Atsushi Nemoto0bcdda02006-12-04 00:42:59 +0900275 END(csum_partial)
Atsushi Nemotof860c902006-12-13 01:22:06 +0900276
277
278/*
279 * checksum and copy routines based on memcpy.S
280 *
281 * csum_partial_copy_nocheck(src, dst, len, sum)
282 * __csum_partial_copy_user(src, dst, len, sum, errp)
283 *
284 * See "Spec" in memcpy.S for details. Unlike __copy_user, all
285 * function in this file use the standard calling convention.
286 */
287
288#define src a0
289#define dst a1
290#define len a2
291#define psum a3
292#define sum v0
293#define odd t8
294#define errptr t9
295
296/*
297 * The exception handler for loads requires that:
298 * 1- AT contain the address of the byte just past the end of the source
299 * of the copy,
300 * 2- src_entry <= src < AT, and
301 * 3- (dst - src) == (dst_entry - src_entry),
302 * The _entry suffix denotes values when __copy_user was called.
303 *
304 * (1) is set up up by __csum_partial_copy_from_user and maintained by
305 * not writing AT in __csum_partial_copy
306 * (2) is met by incrementing src by the number of bytes copied
307 * (3) is met by not doing loads between a pair of increments of dst and src
308 *
309 * The exception handlers for stores stores -EFAULT to errptr and return.
310 * These handlers do not need to overwrite any data.
311 */
312
313#define EXC(inst_reg,addr,handler) \
3149: inst_reg, addr; \
315 .section __ex_table,"a"; \
316 PTR 9b, handler; \
317 .previous
318
319#ifdef USE_DOUBLE
320
321#define LOAD ld
322#define LOADL ldl
323#define LOADR ldr
324#define STOREL sdl
325#define STORER sdr
326#define STORE sd
327#define ADD daddu
328#define SUB dsubu
329#define SRL dsrl
330#define SLL dsll
331#define SLLV dsllv
332#define SRLV dsrlv
333#define NBYTES 8
334#define LOG_NBYTES 3
335
336#else
337
338#define LOAD lw
339#define LOADL lwl
340#define LOADR lwr
341#define STOREL swl
342#define STORER swr
343#define STORE sw
344#define ADD addu
345#define SUB subu
346#define SRL srl
347#define SLL sll
348#define SLLV sllv
349#define SRLV srlv
350#define NBYTES 4
351#define LOG_NBYTES 2
352
353#endif /* USE_DOUBLE */
354
355#ifdef CONFIG_CPU_LITTLE_ENDIAN
356#define LDFIRST LOADR
357#define LDREST LOADL
358#define STFIRST STORER
359#define STREST STOREL
360#define SHIFT_DISCARD SLLV
361#define SHIFT_DISCARD_REVERT SRLV
362#else
363#define LDFIRST LOADL
364#define LDREST LOADR
365#define STFIRST STOREL
366#define STREST STORER
367#define SHIFT_DISCARD SRLV
368#define SHIFT_DISCARD_REVERT SLLV
369#endif
370
371#define FIRST(unit) ((unit)*NBYTES)
372#define REST(unit) (FIRST(unit)+NBYTES-1)
373
374#define ADDRMASK (NBYTES-1)
375
376 .set noat
377
378LEAF(__csum_partial_copy_user)
379 PTR_ADDU AT, src, len /* See (1) above. */
380#ifdef CONFIG_64BIT
381 move errptr, a4
382#else
383 lw errptr, 16(sp)
384#endif
385FEXPORT(csum_partial_copy_nocheck)
386 move sum, zero
387 move odd, zero
388 /*
389 * Note: dst & src may be unaligned, len may be 0
390 * Temps
391 */
392 /*
393 * The "issue break"s below are very approximate.
394 * Issue delays for dcache fills will perturb the schedule, as will
395 * load queue full replay traps, etc.
396 *
397 * If len < NBYTES use byte operations.
398 */
399 sltu t2, len, NBYTES
400 and t1, dst, ADDRMASK
401 bnez t2, copy_bytes_checklen
402 and t0, src, ADDRMASK
403 andi odd, dst, 0x1 /* odd buffer? */
404 bnez t1, dst_unaligned
405 nop
406 bnez t0, src_unaligned_dst_aligned
407 /*
408 * use delay slot for fall-through
409 * src and dst are aligned; need to compute rem
410 */
411both_aligned:
412 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
413 beqz t0, cleanup_both_aligned # len < 8*NBYTES
414 nop
415 SUB len, 8*NBYTES # subtract here for bgez loop
416 .align 4
4171:
418EXC( LOAD t0, UNIT(0)(src), l_exc)
419EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
420EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
421EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
422EXC( LOAD t4, UNIT(4)(src), l_exc_copy)
423EXC( LOAD t5, UNIT(5)(src), l_exc_copy)
424EXC( LOAD t6, UNIT(6)(src), l_exc_copy)
425EXC( LOAD t7, UNIT(7)(src), l_exc_copy)
426 SUB len, len, 8*NBYTES
427 ADD src, src, 8*NBYTES
428EXC( STORE t0, UNIT(0)(dst), s_exc)
429 ADDC(sum, t0)
430EXC( STORE t1, UNIT(1)(dst), s_exc)
431 ADDC(sum, t1)
432EXC( STORE t2, UNIT(2)(dst), s_exc)
433 ADDC(sum, t2)
434EXC( STORE t3, UNIT(3)(dst), s_exc)
435 ADDC(sum, t3)
436EXC( STORE t4, UNIT(4)(dst), s_exc)
437 ADDC(sum, t4)
438EXC( STORE t5, UNIT(5)(dst), s_exc)
439 ADDC(sum, t5)
440EXC( STORE t6, UNIT(6)(dst), s_exc)
441 ADDC(sum, t6)
442EXC( STORE t7, UNIT(7)(dst), s_exc)
443 ADDC(sum, t7)
444 bgez len, 1b
445 ADD dst, dst, 8*NBYTES
446 ADD len, 8*NBYTES # revert len (see above)
447
448 /*
449 * len == the number of bytes left to copy < 8*NBYTES
450 */
451cleanup_both_aligned:
452#define rem t7
453 beqz len, done
454 sltu t0, len, 4*NBYTES
455 bnez t0, less_than_4units
456 and rem, len, (NBYTES-1) # rem = len % NBYTES
457 /*
458 * len >= 4*NBYTES
459 */
460EXC( LOAD t0, UNIT(0)(src), l_exc)
461EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
462EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
463EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
464 SUB len, len, 4*NBYTES
465 ADD src, src, 4*NBYTES
466EXC( STORE t0, UNIT(0)(dst), s_exc)
467 ADDC(sum, t0)
468EXC( STORE t1, UNIT(1)(dst), s_exc)
469 ADDC(sum, t1)
470EXC( STORE t2, UNIT(2)(dst), s_exc)
471 ADDC(sum, t2)
472EXC( STORE t3, UNIT(3)(dst), s_exc)
473 ADDC(sum, t3)
474 beqz len, done
475 ADD dst, dst, 4*NBYTES
476less_than_4units:
477 /*
478 * rem = len % NBYTES
479 */
480 beq rem, len, copy_bytes
481 nop
4821:
483EXC( LOAD t0, 0(src), l_exc)
484 ADD src, src, NBYTES
485 SUB len, len, NBYTES
486EXC( STORE t0, 0(dst), s_exc)
487 ADDC(sum, t0)
488 bne rem, len, 1b
489 ADD dst, dst, NBYTES
490
491 /*
492 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
493 * A loop would do only a byte at a time with possible branch
494 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
495 * because can't assume read-access to dst. Instead, use
496 * STREST dst, which doesn't require read access to dst.
497 *
498 * This code should perform better than a simple loop on modern,
499 * wide-issue mips processors because the code has fewer branches and
500 * more instruction-level parallelism.
501 */
502#define bits t2
503 beqz len, done
504 ADD t1, dst, len # t1 is just past last byte of dst
505 li bits, 8*NBYTES
506 SLL rem, len, 3 # rem = number of bits to keep
507EXC( LOAD t0, 0(src), l_exc)
508 SUB bits, bits, rem # bits = number of bits to discard
509 SHIFT_DISCARD t0, t0, bits
510EXC( STREST t0, -1(t1), s_exc)
511 SHIFT_DISCARD_REVERT t0, t0, bits
512 .set reorder
513 ADDC(sum, t0)
514 b done
515 .set noreorder
516dst_unaligned:
517 /*
518 * dst is unaligned
519 * t0 = src & ADDRMASK
520 * t1 = dst & ADDRMASK; T1 > 0
521 * len >= NBYTES
522 *
523 * Copy enough bytes to align dst
524 * Set match = (src and dst have same alignment)
525 */
526#define match rem
527EXC( LDFIRST t3, FIRST(0)(src), l_exc)
528 ADD t2, zero, NBYTES
529EXC( LDREST t3, REST(0)(src), l_exc_copy)
530 SUB t2, t2, t1 # t2 = number of bytes copied
531 xor match, t0, t1
532EXC( STFIRST t3, FIRST(0)(dst), s_exc)
533 SLL t4, t1, 3 # t4 = number of bits to discard
534 SHIFT_DISCARD t3, t3, t4
535 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
536 ADDC(sum, t3)
537 beq len, t2, done
538 SUB len, len, t2
539 ADD dst, dst, t2
540 beqz match, both_aligned
541 ADD src, src, t2
542
543src_unaligned_dst_aligned:
544 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
545 beqz t0, cleanup_src_unaligned
546 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
5471:
548/*
549 * Avoid consecutive LD*'s to the same register since some mips
550 * implementations can't issue them in the same cycle.
551 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
552 * are to the same unit (unless src is aligned, but it's not).
553 */
554EXC( LDFIRST t0, FIRST(0)(src), l_exc)
555EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
556 SUB len, len, 4*NBYTES
557EXC( LDREST t0, REST(0)(src), l_exc_copy)
558EXC( LDREST t1, REST(1)(src), l_exc_copy)
559EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy)
560EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy)
561EXC( LDREST t2, REST(2)(src), l_exc_copy)
562EXC( LDREST t3, REST(3)(src), l_exc_copy)
563 ADD src, src, 4*NBYTES
564#ifdef CONFIG_CPU_SB1
565 nop # improves slotting
566#endif
567EXC( STORE t0, UNIT(0)(dst), s_exc)
568 ADDC(sum, t0)
569EXC( STORE t1, UNIT(1)(dst), s_exc)
570 ADDC(sum, t1)
571EXC( STORE t2, UNIT(2)(dst), s_exc)
572 ADDC(sum, t2)
573EXC( STORE t3, UNIT(3)(dst), s_exc)
574 ADDC(sum, t3)
575 bne len, rem, 1b
576 ADD dst, dst, 4*NBYTES
577
578cleanup_src_unaligned:
579 beqz len, done
580 and rem, len, NBYTES-1 # rem = len % NBYTES
581 beq rem, len, copy_bytes
582 nop
5831:
584EXC( LDFIRST t0, FIRST(0)(src), l_exc)
585EXC( LDREST t0, REST(0)(src), l_exc_copy)
586 ADD src, src, NBYTES
587 SUB len, len, NBYTES
588EXC( STORE t0, 0(dst), s_exc)
589 ADDC(sum, t0)
590 bne len, rem, 1b
591 ADD dst, dst, NBYTES
592
593copy_bytes_checklen:
594 beqz len, done
595 nop
596copy_bytes:
597 /* 0 < len < NBYTES */
598#ifdef CONFIG_CPU_LITTLE_ENDIAN
599#define SHIFT_START 0
600#define SHIFT_INC 8
601#else
602#define SHIFT_START 8*(NBYTES-1)
603#define SHIFT_INC -8
604#endif
605 move t2, zero # partial word
606 li t3, SHIFT_START # shift
607/* use l_exc_copy here to return correct sum on fault */
608#define COPY_BYTE(N) \
609EXC( lbu t0, N(src), l_exc_copy); \
610 SUB len, len, 1; \
611EXC( sb t0, N(dst), s_exc); \
612 SLLV t0, t0, t3; \
613 addu t3, SHIFT_INC; \
614 beqz len, copy_bytes_done; \
615 or t2, t0
616
617 COPY_BYTE(0)
618 COPY_BYTE(1)
619#ifdef USE_DOUBLE
620 COPY_BYTE(2)
621 COPY_BYTE(3)
622 COPY_BYTE(4)
623 COPY_BYTE(5)
624#endif
625EXC( lbu t0, NBYTES-2(src), l_exc_copy)
626 SUB len, len, 1
627EXC( sb t0, NBYTES-2(dst), s_exc)
628 SLLV t0, t0, t3
629 or t2, t0
630copy_bytes_done:
631 ADDC(sum, t2)
632done:
633 /* fold checksum */
634#ifdef USE_DOUBLE
635 dsll32 v1, sum, 0
636 daddu sum, v1
637 sltu v1, sum, v1
638 dsra32 sum, sum, 0
639 addu sum, v1
640#endif
641 sll v1, sum, 16
642 addu sum, v1
643 sltu v1, sum, v1
644 srl sum, sum, 16
645 addu sum, v1
646
647 /* odd buffer alignment? */
648 beqz odd, 1f
649 nop
650 sll v1, sum, 8
651 srl sum, sum, 8
652 or sum, v1
653 andi sum, 0xffff
6541:
655 .set reorder
656 ADDC(sum, psum)
657 jr ra
658 .set noreorder
659
660l_exc_copy:
661 /*
662 * Copy bytes from src until faulting load address (or until a
663 * lb faults)
664 *
665 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
666 * may be more than a byte beyond the last address.
667 * Hence, the lb below may get an exception.
668 *
669 * Assumes src < THREAD_BUADDR($28)
670 */
671 LOAD t0, TI_TASK($28)
672 li t2, SHIFT_START
673 LOAD t0, THREAD_BUADDR(t0)
6741:
675EXC( lbu t1, 0(src), l_exc)
676 ADD src, src, 1
677 sb t1, 0(dst) # can't fault -- we're copy_from_user
678 SLLV t1, t1, t2
679 addu t2, SHIFT_INC
680 ADDC(sum, t1)
681 bne src, t0, 1b
682 ADD dst, dst, 1
683l_exc:
684 LOAD t0, TI_TASK($28)
685 nop
686 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
687 nop
688 SUB len, AT, t0 # len number of uncopied bytes
689 /*
690 * Here's where we rely on src and dst being incremented in tandem,
691 * See (3) above.
692 * dst += (fault addr - src) to put dst at first byte to clear
693 */
694 ADD dst, t0 # compute start address in a1
695 SUB dst, src
696 /*
697 * Clear len bytes starting at dst. Can't call __bzero because it
698 * might modify len. An inefficient loop for these rare times...
699 */
700 beqz len, done
701 SUB src, len, 1
7021: sb zero, 0(dst)
703 ADD dst, dst, 1
704 bnez src, 1b
705 SUB src, src, 1
706 li v1, -EFAULT
707 b done
708 sw v1, (errptr)
709
710s_exc:
711 li v0, -1 /* invalid checksum */
712 li v1, -EFAULT
713 jr ra
714 sw v1, (errptr)
715 END(__csum_partial_copy_user)