blob: caf5a11fe8c8682ce70b9b375f6cb70122fdf859 [file] [log] [blame]
Christopher Ferris98d57c92014-09-30 11:53:13 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 * products derived from this software without specific prior written
42 * permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#if !defined(STPCPY) && !defined(STRCPY)
57#error "Either STPCPY or STRCPY must be defined."
58#endif
59
60#include <private/bionic_asm.h>
61
62 .syntax unified
63
64 .thumb
65 .thumb_func
66
67#if defined(STPCPY)
68 .macro m_push
69 push {r4, r5, lr}
70 .cfi_def_cfa_offset 12
71 .cfi_rel_offset r4, 0
72 .cfi_rel_offset r5, 4
73 .cfi_rel_offset lr, 8
74 .endm // m_push
75#else
76 .macro m_push
77 push {r0, r4, r5, lr}
78 .cfi_def_cfa_offset 16
79 .cfi_rel_offset r0, 0
80 .cfi_rel_offset r4, 4
81 .cfi_rel_offset r5, 8
82 .cfi_rel_offset lr, 12
83 .endm // m_push
84#endif
85
86#if defined(STPCPY)
87 .macro m_ret inst
88 \inst {r4, r5, pc}
89 .endm // m_ret
90#else
91 .macro m_ret inst
92 \inst {r0, r4, r5, pc}
93 .endm // m_ret
94#endif
95
96 .macro m_copy_byte reg, cmd, label
97 ldrb \reg, [r1], #1
98 strb \reg, [r0], #1
99 \cmd \reg, \label
100 .endm // m_copy_byte
101
102#if defined(STPCPY)
103ENTRY(stpcpy)
104#else
105ENTRY(strcpy)
106#endif
107 // Unroll the first 8 bytes that will be copied.
108 m_push
109 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
110 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
111 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
112 m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish
113 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
114 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
115 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
116 m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue
117
118.Lstringcopy_finish:
119#if defined(STPCPY)
120 sub r0, r0, #1
121#endif
122 m_ret inst=pop
123
124.Lstringcopy_continue:
125 pld [r1, #0]
126 ands r3, r0, #7
127 bne .Lstringcopy_align_dst
128
129.Lstringcopy_check_src_align:
130 // At this point dst is aligned to a double word, check if src
131 // is also aligned to a double word.
132 ands r3, r1, #7
133 bne .Lstringcopy_unaligned_copy
134
135 .p2align 2
136.Lstringcopy_mainloop:
137 ldmia r1!, {r2, r3}
138
139 pld [r1, #64]
140
141 sub ip, r2, #0x01010101
142 bic ip, ip, r2
143 ands ip, ip, #0x80808080
144 bne .Lstringcopy_zero_in_first_register
145
146 sub ip, r3, #0x01010101
147 bic ip, ip, r3
148 ands ip, ip, #0x80808080
149 bne .Lstringcopy_zero_in_second_register
150
151 stmia r0!, {r2, r3}
152 b .Lstringcopy_mainloop
153
154.Lstringcopy_zero_in_first_register:
155 lsls lr, ip, #17
156 itt ne
157 strbne r2, [r0]
158 m_ret inst=popne
159 itt cs
160#if defined(STPCPY)
161 strhcs r2, [r0], #1
162#else
163 strhcs r2, [r0]
164#endif
165 m_ret inst=popcs
166 lsls ip, ip, #1
167 itt eq
168#if defined(STPCPY)
169 streq r2, [r0], #3
170#else
171 streq r2, [r0]
172#endif
173 m_ret inst=popeq
174 strh r2, [r0], #2
175 lsr r3, r2, #16
176 strb r3, [r0]
177 m_ret inst=pop
178
179.Lstringcopy_zero_in_second_register:
180 lsls lr, ip, #17
181 ittt ne
182 stmiane r0!, {r2}
183 strbne r3, [r0]
184 m_ret inst=popne
185 ittt cs
186 strcs r2, [r0], #4
187#if defined(STPCPY)
188 strhcs r3, [r0], #1
189#else
190 strhcs r3, [r0]
191#endif
192 m_ret inst=popcs
193 lsls ip, ip, #1
194#if defined(STPCPY)
195 ittt eq
196#else
197 itt eq
198#endif
199 stmiaeq r0, {r2, r3}
200#if defined(STPCPY)
201 addeq r0, r0, #7
202#endif
203 m_ret inst=popeq
204 stmia r0!, {r2}
205 strh r3, [r0], #2
206 lsr r4, r3, #16
207 strb r4, [r0]
208 m_ret inst=pop
209
210.Lstringcopy_align_dst:
211 // Align to a double word (64 bits).
212 rsb r3, r3, #8
213 lsls ip, r3, #31
214 beq .Lstringcopy_align_to_32
215
216 ldrb r2, [r1], #1
217 strb r2, [r0], #1
218 cbz r2, .Lstringcopy_complete
219
220.Lstringcopy_align_to_32:
221 bcc .Lstringcopy_align_to_64
222
223 ldrb r4, [r1], #1
224 strb r4, [r0], #1
225 cmp r4, #0
226#if defined(STPCPY)
227 itt eq
228 subeq r0, r0, #1
229#else
230 it eq
231#endif
232 m_ret inst=popeq
233 ldrb r5, [r1], #1
234 strb r5, [r0], #1
235 cmp r5, #0
236#if defined(STPCPY)
237 itt eq
238 subeq r0, r0, #1
239#else
240 it eq
241#endif
242 m_ret inst=popeq
243
244.Lstringcopy_align_to_64:
245 tst r3, #4
246 beq .Lstringcopy_check_src_align
247 ldr r2, [r1], #4
248
249 sub ip, r2, #0x01010101
250 bic ip, ip, r2
251 ands ip, ip, #0x80808080
252 bne .Lstringcopy_zero_in_first_register
253 stmia r0!, {r2}
254 b .Lstringcopy_check_src_align
255
256.Lstringcopy_complete:
257#if defined(STPCPY)
258 sub r0, r0, #1
259#endif
260 m_ret inst=pop
261
262.Lstringcopy_unaligned_copy:
263 // Dst is aligned to a double word, while src is at an unknown alignment.
264 // There are 7 different versions of the unaligned copy code
265 // to prevent overreading the src. The mainloop of every single version
266 // will store 64 bits per loop. The difference is how much of src can
267 // be read without potentially crossing a page boundary.
268 tbb [pc, r3]
269.Lstringcopy_unaligned_branchtable:
270 .byte 0
271 .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2)
272 .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2)
273 .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2)
274 .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2)
275 .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2)
276 .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2)
277 .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2)
278
279 .p2align 2
280 // Can read 7 bytes before possibly crossing a page.
281.Lstringcopy_unalign7:
282 ldr r2, [r1], #4
283
284 sub ip, r2, #0x01010101
285 bic ip, ip, r2
286 ands ip, ip, #0x80808080
287 bne .Lstringcopy_zero_in_first_register
288
289 ldrb r3, [r1]
290 cbz r3, .Lstringcopy_unalign7_copy5bytes
291 ldrb r4, [r1, #1]
292 cbz r4, .Lstringcopy_unalign7_copy6bytes
293 ldrb r5, [r1, #2]
294 cbz r5, .Lstringcopy_unalign7_copy7bytes
295
296 ldr r3, [r1], #4
297 pld [r1, #64]
298
299 lsrs ip, r3, #24
300 stmia r0!, {r2, r3}
301#if defined(STPCPY)
302 beq .Lstringcopy_finish
303#else
304 beq .Lstringcopy_unalign_return
305#endif
306 b .Lstringcopy_unalign7
307
308.Lstringcopy_unalign7_copy5bytes:
309 stmia r0!, {r2}
310 strb r3, [r0]
311.Lstringcopy_unalign_return:
312 m_ret inst=pop
313
314.Lstringcopy_unalign7_copy6bytes:
315 stmia r0!, {r2}
316 strb r3, [r0], #1
317 strb r4, [r0]
318 m_ret inst=pop
319
320.Lstringcopy_unalign7_copy7bytes:
321 stmia r0!, {r2}
322 strb r3, [r0], #1
323 strb r4, [r0], #1
324 strb r5, [r0]
325 m_ret inst=pop
326
327 .p2align 2
328 // Can read 6 bytes before possibly crossing a page.
329.Lstringcopy_unalign6:
330 ldr r2, [r1], #4
331
332 sub ip, r2, #0x01010101
333 bic ip, ip, r2
334 ands ip, ip, #0x80808080
335 bne .Lstringcopy_zero_in_first_register
336
337 ldrb r4, [r1]
338 cbz r4, .Lstringcopy_unalign_copy5bytes
339 ldrb r5, [r1, #1]
340 cbz r5, .Lstringcopy_unalign_copy6bytes
341
342 ldr r3, [r1], #4
343 pld [r1, #64]
344
345 tst r3, #0xff0000
346 beq .Lstringcopy_unalign6_copy7bytes
347 lsrs ip, r3, #24
348 stmia r0!, {r2, r3}
349#if defined(STPCPY)
350 beq .Lstringcopy_finish
351#else
352 beq .Lstringcopy_unalign_return
353#endif
354 b .Lstringcopy_unalign6
355
356.Lstringcopy_unalign6_copy7bytes:
357 stmia r0!, {r2}
358 strh r3, [r0], #2
359 lsr r3, #16
360 strb r3, [r0]
361 m_ret inst=pop
362
363 .p2align 2
364 // Can read 5 bytes before possibly crossing a page.
365.Lstringcopy_unalign5:
366 ldr r2, [r1], #4
367
368 sub ip, r2, #0x01010101
369 bic ip, ip, r2
370 ands ip, ip, #0x80808080
371 bne .Lstringcopy_zero_in_first_register
372
373 ldrb r4, [r1]
374 cbz r4, .Lstringcopy_unalign_copy5bytes
375
376 ldr r3, [r1], #4
377
378 pld [r1, #64]
379
380 sub ip, r3, #0x01010101
381 bic ip, ip, r3
382 ands ip, ip, #0x80808080
383 bne .Lstringcopy_zero_in_second_register
384
385 stmia r0!, {r2, r3}
386 b .Lstringcopy_unalign5
387
388.Lstringcopy_unalign_copy5bytes:
389 stmia r0!, {r2}
390 strb r4, [r0]
391 m_ret inst=pop
392
393.Lstringcopy_unalign_copy6bytes:
394 stmia r0!, {r2}
395 strb r4, [r0], #1
396 strb r5, [r0]
397 m_ret inst=pop
398
399 .p2align 2
400 // Can read 4 bytes before possibly crossing a page.
401.Lstringcopy_unalign4:
402 ldmia r1!, {r2}
403
404 sub ip, r2, #0x01010101
405 bic ip, ip, r2
406 ands ip, ip, #0x80808080
407 bne .Lstringcopy_zero_in_first_register
408
409 ldmia r1!, {r3}
410 pld [r1, #64]
411
412 sub ip, r3, #0x01010101
413 bic ip, ip, r3
414 ands ip, ip, #0x80808080
415 bne .Lstringcopy_zero_in_second_register
416
417 stmia r0!, {r2, r3}
418 b .Lstringcopy_unalign4
419
420 .p2align 2
421 // Can read 3 bytes before possibly crossing a page.
422.Lstringcopy_unalign3:
423 ldrb r2, [r1]
424 cbz r2, .Lstringcopy_unalign3_copy1byte
425 ldrb r3, [r1, #1]
426 cbz r3, .Lstringcopy_unalign3_copy2bytes
427 ldrb r4, [r1, #2]
428 cbz r4, .Lstringcopy_unalign3_copy3bytes
429
430 ldr r2, [r1], #4
431 ldr r3, [r1], #4
432
433 pld [r1, #64]
434
435 lsrs lr, r2, #24
436 beq .Lstringcopy_unalign_copy4bytes
437
438 sub ip, r3, #0x01010101
439 bic ip, ip, r3
440 ands ip, ip, #0x80808080
441 bne .Lstringcopy_zero_in_second_register
442
443 stmia r0!, {r2, r3}
444 b .Lstringcopy_unalign3
445
446.Lstringcopy_unalign3_copy1byte:
447 strb r2, [r0]
448 m_ret inst=pop
449
450.Lstringcopy_unalign3_copy2bytes:
451 strb r2, [r0], #1
452 strb r3, [r0]
453 m_ret inst=pop
454
455.Lstringcopy_unalign3_copy3bytes:
456 strb r2, [r0], #1
457 strb r3, [r0], #1
458 strb r4, [r0]
459 m_ret inst=pop
460
461 .p2align 2
462 // Can read 2 bytes before possibly crossing a page.
463.Lstringcopy_unalign2:
464 ldrb r2, [r1]
465 cbz r2, .Lstringcopy_unalign_copy1byte
466 ldrb r3, [r1, #1]
467 cbz r3, .Lstringcopy_unalign_copy2bytes
468
469 ldr r2, [r1], #4
470 ldr r3, [r1], #4
471 pld [r1, #64]
472
473 tst r2, #0xff0000
474 beq .Lstringcopy_unalign_copy3bytes
475 lsrs ip, r2, #24
476 beq .Lstringcopy_unalign_copy4bytes
477
478 sub ip, r3, #0x01010101
479 bic ip, ip, r3
480 ands ip, ip, #0x80808080
481 bne .Lstringcopy_zero_in_second_register
482
483 stmia r0!, {r2, r3}
484 b .Lstringcopy_unalign2
485
486 .p2align 2
487 // Can read 1 byte before possibly crossing a page.
488.Lstringcopy_unalign1:
489 ldrb r2, [r1]
490 cbz r2, .Lstringcopy_unalign_copy1byte
491
492 ldr r2, [r1], #4
493 ldr r3, [r1], #4
494
495 pld [r1, #64]
496
497 sub ip, r2, #0x01010101
498 bic ip, ip, r2
499 ands ip, ip, #0x80808080
500 bne .Lstringcopy_zero_in_first_register
501
502 sub ip, r3, #0x01010101
503 bic ip, ip, r3
504 ands ip, ip, #0x80808080
505 bne .Lstringcopy_zero_in_second_register
506
507 stmia r0!, {r2, r3}
508 b .Lstringcopy_unalign1
509
510.Lstringcopy_unalign_copy1byte:
511 strb r2, [r0]
512 m_ret inst=pop
513
514.Lstringcopy_unalign_copy2bytes:
515 strb r2, [r0], #1
516 strb r3, [r0]
517 m_ret inst=pop
518
519.Lstringcopy_unalign_copy3bytes:
520 strh r2, [r0], #2
521 lsr r2, #16
522 strb r2, [r0]
523 m_ret inst=pop
524
525.Lstringcopy_unalign_copy4bytes:
526 stmia r0, {r2}
527#if defined(STPCPY)
528 add r0, r0, #3
529#endif
530 m_ret inst=pop
531#if defined(STPCPY)
532END(stpcpy)
533#else
534END(strcpy)
535#endif