blob: a8ab1d4fe0ae1530026d1cfe6542071c7387cf81 [file] [log] [blame]
Chris Zankel249ac172005-06-23 22:01:20 -07001/*
2 * arch/xtensa/lib/usercopy.S
3 *
4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5 *
6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 * It needs to remain separate and distinct. The hal files are part
Matt LaPlante4b3f6862006-10-03 22:21:02 +02008 * of the Xtensa link-time HAL, and those files may differ per
Chris Zankel249ac172005-06-23 22:01:20 -07009 * processor configuration. Patching the kernel for another
10 * processor configuration includes replacing the hal files, and we
Matt LaPlante4b3f6862006-10-03 22:21:02 +020011 * could lose the special functionality for accessing user-space
Chris Zankel249ac172005-06-23 22:01:20 -070012 * memory during such a patch. We sacrifice a little code space here
13 * in favor to simplify code maintenance.
14 *
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file "COPYING" in the main directory of
17 * this archive for more details.
18 *
19 * Copyright (C) 2002 Tensilica Inc.
20 */
21
22
23/*
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25 *
26 * The returned value is the number of bytes not copied. Implies zero
27 * is success.
28 *
29 * The general case algorithm is as follows:
30 * If the destination and source are both aligned,
31 * do 16B chunks with a loop, and then finish up with
32 * 8B, 4B, 2B, and 1B copies conditional on the length.
33 * If destination is aligned and source unaligned,
34 * do the same, but use SRC to align the source data.
35 * If destination is unaligned, align it by conditionally
36 * copying 1B and 2B and then retest.
37 * This code tries to use fall-through braches for the common
38 * case of aligned destinations (except for the branches to
39 * the alignment label).
40 *
41 * Register use:
42 * a0/ return address
43 * a1/ stack pointer
44 * a2/ return value
45 * a3/ src
46 * a4/ length
47 * a5/ dst
48 * a6/ tmp
49 * a7/ tmp
50 * a8/ tmp
51 * a9/ tmp
52 * a10/ tmp
53 * a11/ original length
54 */
55
Chris Zankel173d6682006-12-10 02:18:48 -080056#include <asm/variant/core.h>
Chris Zankel249ac172005-06-23 22:01:20 -070057
58#ifdef __XTENSA_EB__
59#define ALIGN(R, W0, W1) src R, W0, W1
60#define SSA8(R) ssa8b R
61#else
62#define ALIGN(R, W0, W1) src R, W1, W0
63#define SSA8(R) ssa8l R
64#endif
65
66/* Load or store instructions that may cause exceptions use the EX macro. */
67
68#define EX(insn,reg1,reg2,offset,handler) \
699: insn reg1, reg2, offset; \
70 .section __ex_table, "a"; \
71 .word 9b, handler; \
72 .previous
73
74
75 .text
76 .align 4
77 .global __xtensa_copy_user
78 .type __xtensa_copy_user,@function
79__xtensa_copy_user:
80 entry sp, 16 # minimal stack frame
81 # a2/ dst, a3/ src, a4/ len
82 mov a5, a2 # copy dst so that a2 is return value
83 mov a11, a4 # preserve original len for error case
84.Lcommon:
85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87.Ldstaligned: # return here from .Ldstunaligned when dst is aligned
88 srli a7, a4, 4 # number of loop iterations with 16B
89 # per iteration
90 movi a8, 3 # if source is also aligned,
91 bnone a3, a8, .Laligned # then use word copy
92 SSA8( a3) # set shift amount from byte offset
93 bnez a4, .Lsrcunaligned
94 movi a2, 0 # return success for len==0
95 retw
96
97/*
98 * Destination is unaligned
99 */
100
101.Ldst1mod2: # dst is only byte aligned
102 bltui a4, 7, .Lbytecopy # do short copies byte by byte
103
104 # copy 1 byte
105 EX(l8ui, a6, a3, 0, l_fixup)
106 addi a3, a3, 1
107 EX(s8i, a6, a5, 0, s_fixup)
108 addi a5, a5, 1
109 addi a4, a4, -1
110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
111 # return to main algorithm
112.Ldst2mod4: # dst 16-bit aligned
113 # copy 2 bytes
114 bltui a4, 6, .Lbytecopy # do short copies byte by byte
115 EX(l8ui, a6, a3, 0, l_fixup)
116 EX(l8ui, a7, a3, 1, l_fixup)
117 addi a3, a3, 2
118 EX(s8i, a6, a5, 0, s_fixup)
119 EX(s8i, a7, a5, 1, s_fixup)
120 addi a5, a5, 2
121 addi a4, a4, -2
122 j .Ldstaligned # dst is now aligned, return to main algorithm
123
124/*
125 * Byte by byte copy
126 */
127 .align 4
128 .byte 0 # 1 mod 4 alignment for LOOPNEZ
129 # (0 mod 4 alignment for LBEG)
130.Lbytecopy:
131#if XCHAL_HAVE_LOOPS
132 loopnez a4, .Lbytecopydone
133#else /* !XCHAL_HAVE_LOOPS */
134 beqz a4, .Lbytecopydone
135 add a7, a3, a4 # a7 = end address for source
136#endif /* !XCHAL_HAVE_LOOPS */
137.Lnextbyte:
138 EX(l8ui, a6, a3, 0, l_fixup)
139 addi a3, a3, 1
140 EX(s8i, a6, a5, 0, s_fixup)
141 addi a5, a5, 1
142#if !XCHAL_HAVE_LOOPS
143 blt a3, a7, .Lnextbyte
144#endif /* !XCHAL_HAVE_LOOPS */
145.Lbytecopydone:
146 movi a2, 0 # return success for len bytes copied
147 retw
148
149/*
150 * Destination and source are word-aligned.
151 */
152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
153 .align 4 # 1 mod 4 alignment for LOOPNEZ
154 .byte 0 # (0 mod 4 alignment for LBEG)
155.Laligned:
156#if XCHAL_HAVE_LOOPS
157 loopnez a7, .Loop1done
158#else /* !XCHAL_HAVE_LOOPS */
159 beqz a7, .Loop1done
160 slli a8, a7, 4
161 add a8, a8, a3 # a8 = end of last 16B source chunk
162#endif /* !XCHAL_HAVE_LOOPS */
163.Loop1:
164 EX(l32i, a6, a3, 0, l_fixup)
165 EX(l32i, a7, a3, 4, l_fixup)
166 EX(s32i, a6, a5, 0, s_fixup)
167 EX(l32i, a6, a3, 8, l_fixup)
168 EX(s32i, a7, a5, 4, s_fixup)
169 EX(l32i, a7, a3, 12, l_fixup)
170 EX(s32i, a6, a5, 8, s_fixup)
171 addi a3, a3, 16
172 EX(s32i, a7, a5, 12, s_fixup)
173 addi a5, a5, 16
174#if !XCHAL_HAVE_LOOPS
175 blt a3, a8, .Loop1
176#endif /* !XCHAL_HAVE_LOOPS */
177.Loop1done:
178 bbci.l a4, 3, .L2
179 # copy 8 bytes
180 EX(l32i, a6, a3, 0, l_fixup)
181 EX(l32i, a7, a3, 4, l_fixup)
182 addi a3, a3, 8
183 EX(s32i, a6, a5, 0, s_fixup)
184 EX(s32i, a7, a5, 4, s_fixup)
185 addi a5, a5, 8
186.L2:
187 bbci.l a4, 2, .L3
188 # copy 4 bytes
189 EX(l32i, a6, a3, 0, l_fixup)
190 addi a3, a3, 4
191 EX(s32i, a6, a5, 0, s_fixup)
192 addi a5, a5, 4
193.L3:
194 bbci.l a4, 1, .L4
195 # copy 2 bytes
196 EX(l16ui, a6, a3, 0, l_fixup)
197 addi a3, a3, 2
198 EX(s16i, a6, a5, 0, s_fixup)
199 addi a5, a5, 2
200.L4:
201 bbci.l a4, 0, .L5
202 # copy 1 byte
203 EX(l8ui, a6, a3, 0, l_fixup)
204 EX(s8i, a6, a5, 0, s_fixup)
205.L5:
206 movi a2, 0 # return success for len bytes copied
207 retw
208
209/*
210 * Destination is aligned, Source is unaligned
211 */
212
213 .align 4
214 .byte 0 # 1 mod 4 alignement for LOOPNEZ
215 # (0 mod 4 alignment for LBEG)
216.Lsrcunaligned:
217 # copy 16 bytes per iteration for word-aligned dst and unaligned src
218 and a10, a3, a8 # save unalignment offset for below
219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
220 EX(l32i, a6, a3, 0, l_fixup) # load first word
221#if XCHAL_HAVE_LOOPS
222 loopnez a7, .Loop2done
223#else /* !XCHAL_HAVE_LOOPS */
224 beqz a7, .Loop2done
225 slli a10, a7, 4
226 add a10, a10, a3 # a10 = end of last 16B source chunk
227#endif /* !XCHAL_HAVE_LOOPS */
228.Loop2:
229 EX(l32i, a7, a3, 4, l_fixup)
230 EX(l32i, a8, a3, 8, l_fixup)
231 ALIGN( a6, a6, a7)
232 EX(s32i, a6, a5, 0, s_fixup)
233 EX(l32i, a9, a3, 12, l_fixup)
234 ALIGN( a7, a7, a8)
235 EX(s32i, a7, a5, 4, s_fixup)
236 EX(l32i, a6, a3, 16, l_fixup)
237 ALIGN( a8, a8, a9)
238 EX(s32i, a8, a5, 8, s_fixup)
239 addi a3, a3, 16
240 ALIGN( a9, a9, a6)
241 EX(s32i, a9, a5, 12, s_fixup)
242 addi a5, a5, 16
243#if !XCHAL_HAVE_LOOPS
244 blt a3, a10, .Loop2
245#endif /* !XCHAL_HAVE_LOOPS */
246.Loop2done:
247 bbci.l a4, 3, .L12
248 # copy 8 bytes
249 EX(l32i, a7, a3, 4, l_fixup)
250 EX(l32i, a8, a3, 8, l_fixup)
251 ALIGN( a6, a6, a7)
252 EX(s32i, a6, a5, 0, s_fixup)
253 addi a3, a3, 8
254 ALIGN( a7, a7, a8)
255 EX(s32i, a7, a5, 4, s_fixup)
256 addi a5, a5, 8
257 mov a6, a8
258.L12:
259 bbci.l a4, 2, .L13
260 # copy 4 bytes
261 EX(l32i, a7, a3, 4, l_fixup)
262 addi a3, a3, 4
263 ALIGN( a6, a6, a7)
264 EX(s32i, a6, a5, 0, s_fixup)
265 addi a5, a5, 4
266 mov a6, a7
267.L13:
268 add a3, a3, a10 # readjust a3 with correct misalignment
269 bbci.l a4, 1, .L14
270 # copy 2 bytes
271 EX(l8ui, a6, a3, 0, l_fixup)
272 EX(l8ui, a7, a3, 1, l_fixup)
273 addi a3, a3, 2
274 EX(s8i, a6, a5, 0, s_fixup)
275 EX(s8i, a7, a5, 1, s_fixup)
276 addi a5, a5, 2
277.L14:
278 bbci.l a4, 0, .L15
279 # copy 1 byte
280 EX(l8ui, a6, a3, 0, l_fixup)
281 EX(s8i, a6, a5, 0, s_fixup)
282.L15:
283 movi a2, 0 # return success for len bytes copied
284 retw
285
286
287 .section .fixup, "ax"
288 .align 4
289
290/* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
294 *
295 * Clearing the remaining pieces of kernel memory plugs security
296 * holes. This functionality is the equivalent of the *_zeroing
297 * functions that some architectures provide.
298 */
299
300.Lmemset:
301 .word memset
302
303s_fixup:
304 sub a2, a5, a2 /* a2 <-- bytes copied */
305 sub a2, a11, a2 /* a2 <-- bytes not copied */
306 retw
307
308l_fixup:
309 sub a2, a5, a2 /* a2 <-- bytes copied */
310 sub a2, a11, a2 /* a2 <-- bytes not copied == return value */
311
312 /* void *memset(void *s, int c, size_t n); */
313 mov a6, a5 /* s */
314 movi a7, 0 /* c */
315 mov a8, a2 /* n */
316 l32r a4, .Lmemset
317 callx4 a4
318 /* Ignore memset return value in a6. */
319 /* a2 still contains bytes not copied. */
320 retw
321