blob: ab9f04641640e3edef376b2da28bce0755a5cd1e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Unified implementation of memcpy, memmove and the __copy_user backend.
7 *
8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +010012 * Copyright (C) 2007 Maciej W. Rozycki
Markos Chandras5bc05972014-01-07 12:57:04 +000013 * Copyright (C) 2014 Imagination Technologies Ltd.
Linus Torvalds1da177e2005-04-16 15:20:36 -070014 *
15 * Mnemonic names for arguments to memcpy/__copy_user
16 */
Ralf Baechlee5adb872005-10-20 22:55:26 +010017
18/*
19 * Hack to resolve longstanding prefetch issue
20 *
21 * Prefetching may be fatal on some systems if we're prefetching beyond the
22 * end of memory on some systems. It's also a seriously bad idea on non
23 * dma-coherent systems.
24 */
Ralf Baechle634286f2009-01-28 17:48:40 +000025#ifdef CONFIG_DMA_NONCOHERENT
Ralf Baechlee5adb872005-10-20 22:55:26 +010026#undef CONFIG_CPU_HAS_PREFETCH
27#endif
28#ifdef CONFIG_MIPS_MALTA
29#undef CONFIG_CPU_HAS_PREFETCH
30#endif
31
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <asm/asm.h>
Sam Ravnborg048eb582005-09-09 22:32:31 +020033#include <asm/asm-offsets.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <asm/regdef.h>
35
36#define dst a0
37#define src a1
38#define len a2
39
40/*
41 * Spec
42 *
43 * memcpy copies len bytes from src to dst and sets v0 to dst.
44 * It assumes that
45 * - src and dst don't overlap
46 * - src is readable
47 * - dst is writable
48 * memcpy uses the standard calling convention
49 *
50 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
51 * the number of uncopied bytes due to an exception caused by a read or write.
52 * __copy_user assumes that src and dst don't overlap, and that the call is
53 * implementing one of the following:
54 * copy_to_user
55 * - src is readable (no exceptions when reading src)
56 * copy_from_user
57 * - dst is writable (no exceptions when writing dst)
58 * __copy_user uses a non-standard calling convention; see
59 * include/asm-mips/uaccess.h
60 *
61 * When an exception happens on a load, the handler must
62 # ensure that all of the destination buffer is overwritten to prevent
63 * leaking information to user mode programs.
64 */
65
66/*
67 * Implementation
68 */
69
70/*
71 * The exception handler for loads requires that:
72 * 1- AT contain the address of the byte just past the end of the source
73 * of the copy,
74 * 2- src_entry <= src < AT, and
75 * 3- (dst - src) == (dst_entry - src_entry),
76 * The _entry suffix denotes values when __copy_user was called.
77 *
78 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
79 * (2) is met by incrementing src by the number of bytes copied
80 * (3) is met by not doing loads between a pair of increments of dst and src
81 *
82 * The exception handlers for stores adjust len (if necessary) and return.
83 * These handlers do not need to overwrite any data.
84 *
85 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
86 * they're not protected.
87 */
88
Markos Chandras5bc05972014-01-07 12:57:04 +000089/* Instruction type */
90#define LD_INSN 1
91#define ST_INSN 2
92
93/*
94 * Wrapper to add an entry in the exception table
95 * in case the insn causes a memory exception.
96 * Arguments:
97 * insn : Load/store instruction
98 * type : Instruction type
99 * reg : Register
100 * addr : Address
101 * handler : Exception handler
102 */
103#define EXC(insn, type, reg, addr, handler) \
1049: insn reg, addr; \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 .section __ex_table,"a"; \
106 PTR 9b, handler; \
107 .previous
108
109/*
110 * Only on the 64-bit kernel we can made use of 64-bit registers.
111 */
Ralf Baechle875d43e2005-09-03 15:56:16 -0700112#ifdef CONFIG_64BIT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113#define USE_DOUBLE
114#endif
115
116#ifdef USE_DOUBLE
117
Markos Chandras5bc05972014-01-07 12:57:04 +0000118#define LOADK ld /* No exception */
119#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler)
120#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler)
121#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler)
122#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler)
123#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler)
124#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125#define ADD daddu
126#define SUB dsubu
127#define SRL dsrl
128#define SRA dsra
129#define SLL dsll
130#define SLLV dsllv
131#define SRLV dsrlv
132#define NBYTES 8
133#define LOG_NBYTES 3
134
Ralf Baechle42a3b4f2005-09-03 15:56:17 -0700135/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 * As we are sharing code base with the mips32 tree (which use the o32 ABI
137 * register definitions). We need to redefine the register definitions from
138 * the n64 ABI register naming to the o32 ABI register naming.
139 */
140#undef t0
141#undef t1
142#undef t2
143#undef t3
144#define t0 $8
145#define t1 $9
146#define t2 $10
147#define t3 $11
148#define t4 $12
149#define t5 $13
150#define t6 $14
151#define t7 $15
Ralf Baechle42a3b4f2005-09-03 15:56:17 -0700152
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153#else
154
Markos Chandras5bc05972014-01-07 12:57:04 +0000155#define LOADK lw /* No exception */
156#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler)
157#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler)
158#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler)
159#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler)
160#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler)
161#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162#define ADD addu
163#define SUB subu
164#define SRL srl
165#define SLL sll
166#define SRA sra
167#define SLLV sllv
168#define SRLV srlv
169#define NBYTES 4
170#define LOG_NBYTES 2
171
172#endif /* USE_DOUBLE */
173
Markos Chandras5bc05972014-01-07 12:57:04 +0000174#define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler)
175#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177#ifdef CONFIG_CPU_LITTLE_ENDIAN
178#define LDFIRST LOADR
Ralf Baechle70342282013-01-22 12:59:30 +0100179#define LDREST LOADL
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180#define STFIRST STORER
Ralf Baechle70342282013-01-22 12:59:30 +0100181#define STREST STOREL
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182#define SHIFT_DISCARD SLLV
183#else
184#define LDFIRST LOADL
Ralf Baechle70342282013-01-22 12:59:30 +0100185#define LDREST LOADR
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186#define STFIRST STOREL
Ralf Baechle70342282013-01-22 12:59:30 +0100187#define STREST STORER
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188#define SHIFT_DISCARD SRLV
189#endif
190
191#define FIRST(unit) ((unit)*NBYTES)
192#define REST(unit) (FIRST(unit)+NBYTES-1)
193#define UNIT(unit) FIRST(unit)
194
195#define ADDRMASK (NBYTES-1)
196
197 .text
198 .set noreorder
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100199#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 .set noat
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100201#else
202 .set at=v1
203#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
205/*
David Daneybb0757e2012-06-06 23:00:31 +0100206 * t6 is used as a flag to note inatomic mode.
207 */
208LEAF(__copy_user_inatomic)
209 b __copy_user_common
210 li t6, 1
211 END(__copy_user_inatomic)
212
213/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 * A combined memcpy/__copy_user
215 * __copy_user sets len to 0 for success; else to an upper bound of
216 * the number of uncopied bytes.
217 * memcpy sets v0 to dst.
218 */
219 .align 5
220LEAF(memcpy) /* a0=dst a1=src a2=len */
221 move v0, dst /* return value */
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000222.L__memcpy:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223FEXPORT(__copy_user)
David Daneybb0757e2012-06-06 23:00:31 +0100224 li t6, 0 /* not inatomic */
225__copy_user_common:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 /*
227 * Note: dst & src may be unaligned, len may be 0
228 * Temps
229 */
230#define rem t8
231
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100232 R10KCBARRIER(0(ra))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 /*
234 * The "issue break"s below are very approximate.
235 * Issue delays for dcache fills will perturb the schedule, as will
236 * load queue full replay traps, etc.
237 *
238 * If len < NBYTES use byte operations.
239 */
240 PREF( 0, 0(src) )
241 PREF( 1, 0(dst) )
242 sltu t2, len, NBYTES
243 and t1, dst, ADDRMASK
244 PREF( 0, 1*32(src) )
245 PREF( 1, 1*32(dst) )
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000246 bnez t2, .Lcopy_bytes_checklen
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 and t0, src, ADDRMASK
248 PREF( 0, 2*32(src) )
249 PREF( 1, 2*32(dst) )
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000250 bnez t1, .Ldst_unaligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 nop
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000252 bnez t0, .Lsrc_unaligned_dst_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 /*
254 * use delay slot for fall-through
255 * src and dst are aligned; need to compute rem
256 */
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000257.Lboth_aligned:
Ralf Baechle70342282013-01-22 12:59:30 +0100258 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000259 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
261 PREF( 0, 3*32(src) )
262 PREF( 1, 3*32(dst) )
263 .align 4
2641:
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100265 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000266 LOAD(t0, UNIT(0)(src), .Ll_exc)
267 LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
268 LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
269 LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 SUB len, len, 8*NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000271 LOAD(t4, UNIT(4)(src), .Ll_exc_copy)
272 LOAD(t7, UNIT(5)(src), .Ll_exc_copy)
273 STORE(t0, UNIT(0)(dst), .Ls_exc_p8u)
274 STORE(t1, UNIT(1)(dst), .Ls_exc_p7u)
275 LOAD(t0, UNIT(6)(src), .Ll_exc_copy)
276 LOAD(t1, UNIT(7)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 ADD src, src, 8*NBYTES
278 ADD dst, dst, 8*NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000279 STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u)
280 STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u)
281 STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u)
282 STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u)
283 STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u)
284 STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 PREF( 0, 8*32(src) )
286 PREF( 1, 8*32(dst) )
287 bne len, rem, 1b
288 nop
289
290 /*
291 * len == rem == the number of bytes left to copy < 8*NBYTES
292 */
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000293.Lcleanup_both_aligned:
294 beqz len, .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 sltu t0, len, 4*NBYTES
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000296 bnez t0, .Lless_than_4units
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 and rem, len, (NBYTES-1) # rem = len % NBYTES
298 /*
299 * len >= 4*NBYTES
300 */
Markos Chandras5bc05972014-01-07 12:57:04 +0000301 LOAD( t0, UNIT(0)(src), .Ll_exc)
302 LOAD( t1, UNIT(1)(src), .Ll_exc_copy)
303 LOAD( t2, UNIT(2)(src), .Ll_exc_copy)
304 LOAD( t3, UNIT(3)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 SUB len, len, 4*NBYTES
306 ADD src, src, 4*NBYTES
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100307 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000308 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u)
309 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u)
310 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u)
311 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u)
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100312 .set reorder /* DADDI_WAR */
313 ADD dst, dst, 4*NBYTES
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000314 beqz len, .Ldone
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100315 .set noreorder
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000316.Lless_than_4units:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 /*
318 * rem = len % NBYTES
319 */
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000320 beq rem, len, .Lcopy_bytes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 nop
3221:
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100323 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000324 LOAD(t0, 0(src), .Ll_exc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 ADD src, src, NBYTES
326 SUB len, len, NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000327 STORE(t0, 0(dst), .Ls_exc_p1u)
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100328 .set reorder /* DADDI_WAR */
329 ADD dst, dst, NBYTES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 bne rem, len, 1b
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100331 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
333 /*
334 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
335 * A loop would do only a byte at a time with possible branch
Ralf Baechle70342282013-01-22 12:59:30 +0100336 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 * because can't assume read-access to dst. Instead, use
338 * STREST dst, which doesn't require read access to dst.
339 *
340 * This code should perform better than a simple loop on modern,
341 * wide-issue mips processors because the code has fewer branches and
342 * more instruction-level parallelism.
343 */
344#define bits t2
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000345 beqz len, .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 ADD t1, dst, len # t1 is just past last byte of dst
347 li bits, 8*NBYTES
348 SLL rem, len, 3 # rem = number of bits to keep
Markos Chandras5bc05972014-01-07 12:57:04 +0000349 LOAD(t0, 0(src), .Ll_exc)
Ralf Baechle70342282013-01-22 12:59:30 +0100350 SUB bits, bits, rem # bits = number of bits to discard
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 SHIFT_DISCARD t0, t0, bits
Markos Chandras5bc05972014-01-07 12:57:04 +0000352 STREST(t0, -1(t1), .Ls_exc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 jr ra
354 move len, zero
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000355.Ldst_unaligned:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 /*
357 * dst is unaligned
358 * t0 = src & ADDRMASK
359 * t1 = dst & ADDRMASK; T1 > 0
360 * len >= NBYTES
361 *
362 * Copy enough bytes to align dst
363 * Set match = (src and dst have same alignment)
364 */
365#define match rem
Markos Chandras5bc05972014-01-07 12:57:04 +0000366 LDFIRST(t3, FIRST(0)(src), .Ll_exc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 ADD t2, zero, NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000368 LDREST(t3, REST(0)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 SUB t2, t2, t1 # t2 = number of bytes copied
370 xor match, t0, t1
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100371 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000372 STFIRST(t3, FIRST(0)(dst), .Ls_exc)
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000373 beq len, t2, .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 SUB len, len, t2
375 ADD dst, dst, t2
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000376 beqz match, .Lboth_aligned
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 ADD src, src, t2
378
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000379.Lsrc_unaligned_dst_aligned:
Ralf Baechle70342282013-01-22 12:59:30 +0100380 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 PREF( 0, 3*32(src) )
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000382 beqz t0, .Lcleanup_src_unaligned
Ralf Baechle70342282013-01-22 12:59:30 +0100383 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 PREF( 1, 3*32(dst) )
3851:
386/*
387 * Avoid consecutive LD*'s to the same register since some mips
388 * implementations can't issue them in the same cycle.
389 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
390 * are to the same unit (unless src is aligned, but it's not).
391 */
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100392 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000393 LDFIRST(t0, FIRST(0)(src), .Ll_exc)
394 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy)
Ralf Baechle70342282013-01-22 12:59:30 +0100395 SUB len, len, 4*NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000396 LDREST(t0, REST(0)(src), .Ll_exc_copy)
397 LDREST(t1, REST(1)(src), .Ll_exc_copy)
398 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy)
399 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy)
400 LDREST(t2, REST(2)(src), .Ll_exc_copy)
401 LDREST(t3, REST(3)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
403 ADD src, src, 4*NBYTES
404#ifdef CONFIG_CPU_SB1
405 nop # improves slotting
406#endif
Markos Chandras5bc05972014-01-07 12:57:04 +0000407 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u)
408 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u)
409 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u)
410 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u)
Ralf Baechle70342282013-01-22 12:59:30 +0100411 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100412 .set reorder /* DADDI_WAR */
413 ADD dst, dst, 4*NBYTES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 bne len, rem, 1b
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100415 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000417.Lcleanup_src_unaligned:
418 beqz len, .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 and rem, len, NBYTES-1 # rem = len % NBYTES
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000420 beq rem, len, .Lcopy_bytes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 nop
4221:
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100423 R10KCBARRIER(0(ra))
Markos Chandras5bc05972014-01-07 12:57:04 +0000424 LDFIRST(t0, FIRST(0)(src), .Ll_exc)
425 LDREST(t0, REST(0)(src), .Ll_exc_copy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 ADD src, src, NBYTES
427 SUB len, len, NBYTES
Markos Chandras5bc05972014-01-07 12:57:04 +0000428 STORE(t0, 0(dst), .Ls_exc_p1u)
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100429 .set reorder /* DADDI_WAR */
430 ADD dst, dst, NBYTES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 bne len, rem, 1b
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100432 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000434.Lcopy_bytes_checklen:
435 beqz len, .Ldone
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 nop
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000437.Lcopy_bytes:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 /* 0 < len < NBYTES */
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100439 R10KCBARRIER(0(ra))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440#define COPY_BYTE(N) \
Markos Chandras5bc05972014-01-07 12:57:04 +0000441 LOADB(t0, N(src), .Ll_exc); \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 SUB len, len, 1; \
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000443 beqz len, .Ldone; \
Markos Chandras5bc05972014-01-07 12:57:04 +0000444 STOREB(t0, N(dst), .Ls_exc_p1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446 COPY_BYTE(0)
447 COPY_BYTE(1)
448#ifdef USE_DOUBLE
449 COPY_BYTE(2)
450 COPY_BYTE(3)
451 COPY_BYTE(4)
452 COPY_BYTE(5)
453#endif
Markos Chandras5bc05972014-01-07 12:57:04 +0000454 LOADB(t0, NBYTES-2(src), .Ll_exc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 SUB len, len, 1
456 jr ra
Markos Chandras5bc05972014-01-07 12:57:04 +0000457 STOREB(t0, NBYTES-2(dst), .Ls_exc_p1)
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000458.Ldone:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 jr ra
460 nop
461 END(memcpy)
462
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000463.Ll_exc_copy:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 /*
465 * Copy bytes from src until faulting load address (or until a
466 * lb faults)
467 *
468 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
469 * may be more than a byte beyond the last address.
470 * Hence, the lb below may get an exception.
471 *
472 * Assumes src < THREAD_BUADDR($28)
473 */
Markos Chandras5bc05972014-01-07 12:57:04 +0000474 LOADK t0, TI_TASK($28)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 nop
Markos Chandras5bc05972014-01-07 12:57:04 +0000476 LOADK t0, THREAD_BUADDR(t0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004771:
Markos Chandras5bc05972014-01-07 12:57:04 +0000478 LOADB(t1, 0(src), .Ll_exc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 ADD src, src, 1
480 sb t1, 0(dst) # can't fault -- we're copy_from_user
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100481 .set reorder /* DADDI_WAR */
482 ADD dst, dst, 1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 bne src, t0, 1b
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100484 .set noreorder
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000485.Ll_exc:
Markos Chandras5bc05972014-01-07 12:57:04 +0000486 LOADK t0, TI_TASK($28)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 nop
Markos Chandras5bc05972014-01-07 12:57:04 +0000488 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 nop
490 SUB len, AT, t0 # len number of uncopied bytes
David Daneybb0757e2012-06-06 23:00:31 +0100491 bnez t6, .Ldone /* Skip the zeroing part if inatomic */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 /*
493 * Here's where we rely on src and dst being incremented in tandem,
494 * See (3) above.
495 * dst += (fault addr - src) to put dst at first byte to clear
496 */
497 ADD dst, t0 # compute start address in a1
498 SUB dst, src
499 /*
500 * Clear len bytes starting at dst. Can't call __bzero because it
501 * might modify len. An inefficient loop for these rare times...
502 */
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100503 .set reorder /* DADDI_WAR */
504 SUB src, len, 1
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000505 beqz len, .Ldone
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100506 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -07005071: sb zero, 0(dst)
508 ADD dst, dst, 1
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100509#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 bnez src, 1b
511 SUB src, src, 1
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100512#else
513 .set push
514 .set noat
515 li v1, 1
516 bnez src, 1b
517 SUB src, src, v1
518 .set pop
519#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 jr ra
521 nop
522
523
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100524#define SEXC(n) \
Ralf Baechle70342282013-01-22 12:59:30 +0100525 .set reorder; /* DADDI_WAR */ \
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000526.Ls_exc_p ## n ## u: \
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100527 ADD len, len, n*NBYTES; \
528 jr ra; \
529 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530
531SEXC(8)
532SEXC(7)
533SEXC(6)
534SEXC(5)
535SEXC(4)
536SEXC(3)
537SEXC(2)
538SEXC(1)
539
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000540.Ls_exc_p1:
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100541 .set reorder /* DADDI_WAR */
542 ADD len, len, 1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 jr ra
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100544 .set noreorder
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000545.Ls_exc:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 jr ra
547 nop
548
549 .align 5
550LEAF(memmove)
551 ADD t0, a0, a2
552 ADD t1, a1, a2
553 sltu t0, a1, t0 # dst + len <= src -> memcpy
554 sltu t1, a0, t1 # dst >= src + len -> memcpy
555 and t0, t1
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000556 beqz t0, .L__memcpy
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 move v0, a0 /* return value */
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000558 beqz a2, .Lr_out
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 END(memmove)
560
561 /* fall through to __rmemcpy */
562LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
563 sltu t0, a1, a0
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000564 beqz t0, .Lr_end_bytes_up # src >= dst
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 nop
566 ADD a0, a2 # dst = dst + len
567 ADD a1, a2 # src = src + len
568
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000569.Lr_end_bytes:
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100570 R10KCBARRIER(0(ra))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 lb t0, -1(a1)
572 SUB a2, a2, 0x1
573 sb t0, -1(a0)
574 SUB a1, a1, 0x1
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100575 .set reorder /* DADDI_WAR */
576 SUB a0, a0, 0x1
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000577 bnez a2, .Lr_end_bytes
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100578 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000580.Lr_out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 jr ra
582 move a2, zero
583
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000584.Lr_end_bytes_up:
Thomas Bogendoerfer930bff82007-11-25 11:47:56 +0100585 R10KCBARRIER(0(ra))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 lb t0, (a1)
587 SUB a2, a2, 0x1
588 sb t0, (a0)
589 ADD a1, a1, 0x1
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100590 .set reorder /* DADDI_WAR */
591 ADD a0, a0, 0x1
Ralf Baechlec5ec1982008-01-29 10:14:59 +0000592 bnez a2, .Lr_end_bytes_up
Maciej W. Rozycki619b6e12007-10-23 12:43:25 +0100593 .set noreorder
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
595 jr ra
596 move a2, zero
597 END(__rmemcpy)