blob: 9aa4f883ded960e3aa8b4c412202c3498312adef [file] [log] [blame]
Christopher Ferris4e24dcc2013-07-15 12:49:26 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 * products derived from this software without specific prior written
42 * permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#include <machine/asm.h>
57
58 .syntax unified
59
60 .thumb
61 .thumb_func
62
63 .macro m_push
64 push {r0, r4, r5, lr}
65 .endm // m_push
66
67 .macro m_ret inst
68 \inst {r0, r4, r5, pc}
69 .endm // m_ret
70
71 .macro m_copy_byte reg, cmd, label
72 ldrb \reg, [r1], #1
73 strb \reg, [r0], #1
74 \cmd \reg, \label
75 .endm // m_copy_byte
76
77ENTRY(strcpy)
78 // Unroll the first 8 bytes that will be copied.
79 m_push
80 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
81 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
82 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
83 m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
84 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
85 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
86 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
87 m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
88
89strcpy_finish:
90 m_ret inst=pop
91
92strcpy_continue:
93 pld [r1, #0]
94 ands r3, r0, #7
95 bne strcpy_align_dst
96
97strcpy_check_src_align:
98 // At this point dst is aligned to a double word, check if src
99 // is also aligned to a double word.
100 ands r3, r1, #7
101 bne strcpy_unaligned_copy
102
103 .p2align 2
104strcpy_mainloop:
105 ldmia r1!, {r2, r3}
106
107 pld [r1, #64]
108
109 sub ip, r2, #0x01010101
110 bic ip, ip, r2
111 ands ip, ip, #0x80808080
112 bne strcpy_zero_in_first_register
113
114 sub ip, r3, #0x01010101
115 bic ip, ip, r3
116 ands ip, ip, #0x80808080
117 bne strcpy_zero_in_second_register
118
119 stmia r0!, {r2, r3}
120 b strcpy_mainloop
121
122strcpy_zero_in_first_register:
123 lsls lr, ip, #17
124 itt ne
125 strbne r2, [r0]
126 m_ret inst=popne
127 itt cs
128 strhcs r2, [r0]
129 m_ret inst=popcs
130 lsls ip, ip, #1
131 itt eq
132 streq r2, [r0]
133 m_ret inst=popeq
134 strh r2, [r0], #2
135 lsr r3, r2, #16
136 strb r3, [r0]
137 m_ret inst=pop
138
139strcpy_zero_in_second_register:
140 lsls lr, ip, #17
141 ittt ne
142 stmiane r0!, {r2}
143 strbne r3, [r0]
144 m_ret inst=popne
145 ittt cs
146 strcs r2, [r0], #4
147 strhcs r3, [r0]
148 m_ret inst=popcs
149 lsls ip, ip, #1
150 itt eq
151 stmiaeq r0, {r2, r3}
152 m_ret inst=popeq
153 stmia r0!, {r2}
154 strh r3, [r0], #2
155 lsr r4, r3, #16
156 strb r4, [r0]
157 m_ret inst=pop
158
159strcpy_align_dst:
160 // Align to a double word (64 bits).
161 rsb r3, r3, #8
162 lsls ip, r3, #31
163 beq strcpy_align_to_32
164
165 ldrb r2, [r1], #1
166 strb r2, [r0], #1
167 cbz r2, strcpy_complete
168
169strcpy_align_to_32:
170 bcc strcpy_align_to_64
171
172 ldrb r4, [r1], #1
173 strb r4, [r0], #1
174 cmp r4, #0
175 it eq
176 m_ret inst=popeq
177 ldrb r5, [r1], #1
178 strb r5, [r0], #1
179 cmp r5, #0
180 it eq
181 m_ret inst=popeq
182
183strcpy_align_to_64:
184 tst r3, #4
185 beq strcpy_check_src_align
186 ldr r2, [r1], #4
187
188 sub ip, r2, #0x01010101
189 bic ip, ip, r2
190 ands ip, ip, #0x80808080
191 bne strcpy_zero_in_first_register
192 stmia r0!, {r2}
193 b strcpy_check_src_align
194
195strcpy_complete:
196 m_ret inst=pop
197
198strcpy_unaligned_copy:
199 // Dst is aligned to a double word, while src is at an unknown alignment.
200 // There are 7 different versions of the unaligned copy code
201 // to prevent overreading the src. The mainloop of every single version
202 // will store 64 bits per loop. The difference is how much of src can
203 // be read without potentially crossing a page boundary.
204 tbb [pc, r3]
205strcpy_unaligned_branchtable:
206 .byte 0
207 .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
208 .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
209 .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
210 .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
211 .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
212 .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
213 .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
214
215 .p2align 2
216 // Can read 7 bytes before possibly crossing a page.
217strcpy_unalign7:
218 ldr r2, [r1], #4
219
220 sub ip, r2, #0x01010101
221 bic ip, ip, r2
222 ands ip, ip, #0x80808080
223 bne strcpy_zero_in_first_register
224
225 ldrb r3, [r1]
226 cbz r3, strcpy_unalign7_copy5bytes
227 ldrb r4, [r1, #1]
228 cbz r4, strcpy_unalign7_copy6bytes
229 ldrb r5, [r1, #2]
230 cbz r5, strcpy_unalign7_copy7bytes
231
232 ldr r3, [r1], #4
233 pld [r1, #64]
234
235 lsrs ip, r3, #24
236 stmia r0!, {r2, r3}
237 beq strcpy_unalign_return
238 b strcpy_unalign7
239
240strcpy_unalign7_copy5bytes:
241 stmia r0!, {r2}
242 strb r3, [r0]
243strcpy_unalign_return:
244 m_ret inst=pop
245
246strcpy_unalign7_copy6bytes:
247 stmia r0!, {r2}
248 strb r3, [r0], #1
249 strb r4, [r0], #1
250 m_ret inst=pop
251
252strcpy_unalign7_copy7bytes:
253 stmia r0!, {r2}
254 strb r3, [r0], #1
255 strb r4, [r0], #1
256 strb r5, [r0], #1
257 m_ret inst=pop
258
259 .p2align 2
260 // Can read 6 bytes before possibly crossing a page.
261strcpy_unalign6:
262 ldr r2, [r1], #4
263
264 sub ip, r2, #0x01010101
265 bic ip, ip, r2
266 ands ip, ip, #0x80808080
267 bne strcpy_zero_in_first_register
268
269 ldrb r4, [r1]
270 cbz r4, strcpy_unalign_copy5bytes
271 ldrb r5, [r1, #1]
272 cbz r5, strcpy_unalign_copy6bytes
273
274 ldr r3, [r1], #4
275 pld [r1, #64]
276
277 tst r3, #0xff0000
278 beq strcpy_unalign6_copy7bytes
279 lsrs ip, r3, #24
280 stmia r0!, {r2, r3}
281 beq strcpy_unalign_return
282 b strcpy_unalign6
283
284strcpy_unalign6_copy7bytes:
285 stmia r0!, {r2}
286 strh r3, [r0], #2
287 lsr r3, #16
288 strb r3, [r0]
289 m_ret inst=pop
290
291 .p2align 2
292 // Can read 5 bytes before possibly crossing a page.
293strcpy_unalign5:
294 ldr r2, [r1], #4
295
296 sub ip, r2, #0x01010101
297 bic ip, ip, r2
298 ands ip, ip, #0x80808080
299 bne strcpy_zero_in_first_register
300
301 ldrb r4, [r1]
302 cbz r4, strcpy_unalign_copy5bytes
303
304 ldr r3, [r1], #4
305
306 pld [r1, #64]
307
308 sub ip, r3, #0x01010101
309 bic ip, ip, r3
310 ands ip, ip, #0x80808080
311 bne strcpy_zero_in_second_register
312
313 stmia r0!, {r2, r3}
314 b strcpy_unalign5
315
316strcpy_unalign_copy5bytes:
317 stmia r0!, {r2}
318 strb r4, [r0]
319 m_ret inst=pop
320
321strcpy_unalign_copy6bytes:
322 stmia r0!, {r2}
323 strb r4, [r0], #1
324 strb r5, [r0]
325 m_ret inst=pop
326
327 .p2align 2
328 // Can read 4 bytes before possibly crossing a page.
329strcpy_unalign4:
330 ldmia r1!, {r2}
331
332 sub ip, r2, #0x01010101
333 bic ip, ip, r2
334 ands ip, ip, #0x80808080
335 bne strcpy_zero_in_first_register
336
337 ldmia r1!, {r3}
338 pld [r1, #64]
339
340 sub ip, r3, #0x01010101
341 bic ip, ip, r3
342 ands ip, ip, #0x80808080
343 bne strcpy_zero_in_second_register
344
345 stmia r0!, {r2, r3}
346 b strcpy_unalign4
347
348 .p2align 2
349 // Can read 3 bytes before possibly crossing a page.
350strcpy_unalign3:
351 ldrb r2, [r1]
352 cbz r2, strcpy_unalign3_copy1byte
353 ldrb r3, [r1, #1]
354 cbz r3, strcpy_unalign3_copy2bytes
355 ldrb r4, [r1, #2]
356 cbz r4, strcpy_unalign3_copy3bytes
357
358 ldr r2, [r1], #4
359 ldr r3, [r1], #4
360
361 pld [r1, #64]
362
363 lsrs lr, r2, #24
364 beq strcpy_unalign_copy4bytes
365
366 sub ip, r3, #0x01010101
367 bic ip, ip, r3
368 ands ip, ip, #0x80808080
369 bne strcpy_zero_in_second_register
370
371 stmia r0!, {r2, r3}
372 b strcpy_unalign3
373
374strcpy_unalign3_copy1byte:
375 strb r2, [r0]
376 m_ret inst=pop
377
378strcpy_unalign3_copy2bytes:
379 strb r2, [r0], #1
380 strb r3, [r0]
381 m_ret inst=pop
382
383strcpy_unalign3_copy3bytes:
384 strb r2, [r0], #1
385 strb r3, [r0], #1
386 strb r4, [r0]
387 m_ret inst=pop
388
389 .p2align 2
390 // Can read 2 bytes before possibly crossing a page.
391strcpy_unalign2:
392 ldrb r2, [r1]
393 cbz r2, strcpy_unalign_copy1byte
394 ldrb r3, [r1, #1]
395 cbz r3, strcpy_unalign_copy2bytes
396
397 ldr r2, [r1], #4
398 ldr r3, [r1], #4
399 pld [r1, #64]
400
401 tst r2, #0xff0000
402 beq strcpy_unalign_copy3bytes
403 lsrs ip, r2, #24
404 beq strcpy_unalign_copy4bytes
405
406 sub ip, r3, #0x01010101
407 bic ip, ip, r3
408 ands ip, ip, #0x80808080
409 bne strcpy_zero_in_second_register
410
411 stmia r0!, {r2, r3}
412 b strcpy_unalign2
413
414 .p2align 2
415 // Can read 1 byte before possibly crossing a page.
416strcpy_unalign1:
417 ldrb r2, [r1]
418 cbz r2, strcpy_unalign_copy1byte
419
420 ldr r2, [r1], #4
421 ldr r3, [r1], #4
422
423 pld [r1, #64]
424
425 sub ip, r2, #0x01010101
426 bic ip, ip, r2
427 ands ip, ip, #0x80808080
428 bne strcpy_zero_in_first_register
429
430 sub ip, r3, #0x01010101
431 bic ip, ip, r3
432 ands ip, ip, #0x80808080
433 bne strcpy_zero_in_second_register
434
435 stmia r0!, {r2, r3}
436 b strcpy_unalign1
437
438strcpy_unalign_copy1byte:
439 strb r2, [r0]
440 m_ret inst=pop
441
442strcpy_unalign_copy2bytes:
443 strb r2, [r0], #1
444 strb r3, [r0]
445 m_ret inst=pop
446
447strcpy_unalign_copy3bytes:
448 strh r2, [r0], #2
449 lsr r2, #16
450 strb r2, [r0]
451 m_ret inst=pop
452
453strcpy_unalign_copy4bytes:
454 stmia r0, {r2}
455 m_ret inst=pop
456END(strcpy)