blob: 20f0e91b0dfd46dfc1cf294236d184d8670c62ea [file] [log] [blame]
Christopher Ferris98d57c92014-09-30 11:53:13 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 * products derived from this software without specific prior written
42 * permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
56#if !defined(STPCPY) && !defined(STRCPY)
57#error "Either STPCPY or STRCPY must be defined."
58#endif
59
60#include <private/bionic_asm.h>
61
62 .syntax unified
63
64 .thumb
65 .thumb_func
66
67#if defined(STPCPY)
68 .macro m_push
69 push {r4, r5, lr}
70 .cfi_def_cfa_offset 12
71 .cfi_rel_offset r4, 0
72 .cfi_rel_offset r5, 4
73 .cfi_rel_offset lr, 8
74 .endm // m_push
75#else
76 .macro m_push
77 push {r0, r4, r5, lr}
78 .cfi_def_cfa_offset 16
79 .cfi_rel_offset r0, 0
80 .cfi_rel_offset r4, 4
81 .cfi_rel_offset r5, 8
82 .cfi_rel_offset lr, 12
83 .endm // m_push
84#endif
85
86#if defined(STPCPY)
87 .macro m_pop
88 pop {r4, r5, pc}
89 .endm // m_pop
90#else
91 .macro m_pop
92 pop {r0, r4, r5, pc}
93 .endm // m_pop
94#endif
95
96 .macro m_copy_byte reg, cmd, label
97 ldrb \reg, [r1], #1
98 strb \reg, [r0], #1
99 \cmd \reg, \label
100 .endm // m_copy_byte
101
102#if defined(STPCPY)
103ENTRY(stpcpy)
104#else
105ENTRY(strcpy)
106#endif
107 // For short copies, hard-code checking the first 8 bytes since this
108 // new code doesn't win until after about 8 bytes.
109 m_push
110 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
111 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
112 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
113 m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish
114 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
115 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
116 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
117 m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue
118
119.Lstringcopy_finish:
120#if defined(STPCPY)
121 sub r0, r0, #1
122#endif
123 m_pop
124
125.Lstringcopy_continue:
126 pld [r1, #0]
127 ands r3, r0, #7
128 beq .Lstringcopy_check_src_align
129
130 // Align to a double word (64 bits).
131 rsb r3, r3, #8
132 lsls ip, r3, #31
133 beq .Lstringcopy_align_to_32
134
135 ldrb r2, [r1], #1
136 strb r2, [r0], #1
137 cbz r2, .Lstringcopy_complete
138
139.Lstringcopy_align_to_32:
140 bcc .Lstringcopy_align_to_64
141
142 ldrb r2, [r1], #1
143 strb r2, [r0], #1
144 cbz r2, .Lstringcopy_complete
145 ldrb r2, [r1], #1
146 strb r2, [r0], #1
147 cbz r2, .Lstringcopy_complete
148
149.Lstringcopy_align_to_64:
150 tst r3, #4
151 beq .Lstringcopy_check_src_align
152 ldr r2, [r1], #4
153
154 sub ip, r2, #0x01010101
155 bic ip, ip, r2
156 ands ip, ip, #0x80808080
157 bne .Lstringcopy_zero_in_first_register
158 str r2, [r0], #4
159
160.Lstringcopy_check_src_align:
161 // At this point dst is aligned to a double word, check if src
162 // is also aligned to a double word.
163 ands r3, r1, #7
164 bne .Lstringcopy_unaligned_copy
165
166 .p2align 2
167.Lstringcopy_mainloop:
168 ldrd r2, r3, [r1], #8
169
170 pld [r1, #64]
171
172 sub ip, r2, #0x01010101
173 bic ip, ip, r2
174 ands ip, ip, #0x80808080
175 bne .Lstringcopy_zero_in_first_register
176
177 sub ip, r3, #0x01010101
178 bic ip, ip, r3
179 ands ip, ip, #0x80808080
180 bne .Lstringcopy_zero_in_second_register
181
182 strd r2, r3, [r0], #8
183 b .Lstringcopy_mainloop
184
185.Lstringcopy_complete:
186#if defined(STPCPY)
187 sub r0, r0, #1
188#endif
189 m_pop
190
191.Lstringcopy_zero_in_first_register:
192 lsls lr, ip, #17
193 bne .Lstringcopy_copy1byte
194 bcs .Lstringcopy_copy2bytes
195 lsls ip, ip, #1
196 bne .Lstringcopy_copy3bytes
197
198.Lstringcopy_copy4bytes:
199 // Copy 4 bytes to the destiniation.
200#if defined(STPCPY)
201 str r2, [r0], #3
202#else
203 str r2, [r0]
204#endif
205 m_pop
206
207.Lstringcopy_copy1byte:
208 strb r2, [r0]
209 m_pop
210
211.Lstringcopy_copy2bytes:
212#if defined(STPCPY)
213 strh r2, [r0], #1
214#else
215 strh r2, [r0]
216#endif
217 m_pop
218
219.Lstringcopy_copy3bytes:
220 strh r2, [r0], #2
221 lsr r2, #16
222 strb r2, [r0]
223 m_pop
224
225.Lstringcopy_zero_in_second_register:
226 lsls lr, ip, #17
227 bne .Lstringcopy_copy5bytes
228 bcs .Lstringcopy_copy6bytes
229 lsls ip, ip, #1
230 bne .Lstringcopy_copy7bytes
231
232 // Copy 8 bytes to the destination.
233 strd r2, r3, [r0]
234#if defined(STPCPY)
235 add r0, r0, #7
236#endif
237 m_pop
238
239.Lstringcopy_copy5bytes:
240 str r2, [r0], #4
241 strb r3, [r0]
242 m_pop
243
244.Lstringcopy_copy6bytes:
245 str r2, [r0], #4
246#if defined(STPCPY)
247 strh r3, [r0], #1
248#else
249 strh r3, [r0]
250#endif
251 m_pop
252
253.Lstringcopy_copy7bytes:
254 str r2, [r0], #4
255 strh r3, [r0], #2
256 lsr r3, #16
257 strb r3, [r0]
258 m_pop
259
260.Lstringcopy_unaligned_copy:
261 // Dst is aligned to a double word, while src is at an unknown alignment.
262 // There are 7 different versions of the unaligned copy code
263 // to prevent overreading the src. The mainloop of every single version
264 // will store 64 bits per loop. The difference is how much of src can
265 // be read without potentially crossing a page boundary.
266 tbb [pc, r3]
267.Lstringcopy_unaligned_branchtable:
268 .byte 0
269 .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2)
270 .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2)
271 .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2)
272 .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2)
273 .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2)
274 .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2)
275 .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2)
276
277 .p2align 2
278 // Can read 7 bytes before possibly crossing a page.
279.Lstringcopy_unalign7:
280 ldr r2, [r1], #4
281
282 sub ip, r2, #0x01010101
283 bic ip, ip, r2
284 ands ip, ip, #0x80808080
285 bne .Lstringcopy_zero_in_first_register
286
287 ldrb r3, [r1]
288 cbz r3, .Lstringcopy_unalign7_copy5bytes
289 ldrb r4, [r1, #1]
290 cbz r4, .Lstringcopy_unalign7_copy6bytes
291 ldrb r5, [r1, #2]
292 cbz r5, .Lstringcopy_unalign7_copy7bytes
293
294 ldr r3, [r1], #4
295 pld [r1, #64]
296
297 lsrs ip, r3, #24
298 strd r2, r3, [r0], #8
299#if defined(STPCPY)
300 beq .Lstringcopy_finish
301#else
302 beq .Lstringcopy_unalign_return
303#endif
304 b .Lstringcopy_unalign7
305
306.Lstringcopy_unalign7_copy5bytes:
307 str r2, [r0], #4
308 strb r3, [r0]
309.Lstringcopy_unalign_return:
310 m_pop
311
312.Lstringcopy_unalign7_copy6bytes:
313 str r2, [r0], #4
314 strb r3, [r0], #1
315 strb r4, [r0]
316 m_pop
317
318.Lstringcopy_unalign7_copy7bytes:
319 str r2, [r0], #4
320 strb r3, [r0], #1
321 strb r4, [r0], #1
322 strb r5, [r0]
323 m_pop
324
325 .p2align 2
326 // Can read 6 bytes before possibly crossing a page.
327.Lstringcopy_unalign6:
328 ldr r2, [r1], #4
329
330 sub ip, r2, #0x01010101
331 bic ip, ip, r2
332 ands ip, ip, #0x80808080
333 bne .Lstringcopy_zero_in_first_register
334
335 ldrb r4, [r1]
336 cbz r4, .Lstringcopy_unalign_copy5bytes
337 ldrb r5, [r1, #1]
338 cbz r5, .Lstringcopy_unalign_copy6bytes
339
340 ldr r3, [r1], #4
341 pld [r1, #64]
342
343 tst r3, #0xff0000
344 beq .Lstringcopy_copy7bytes
345 lsrs ip, r3, #24
346 strd r2, r3, [r0], #8
347#if defined(STPCPY)
348 beq .Lstringcopy_finish
349#else
350 beq .Lstringcopy_unalign_return
351#endif
352 b .Lstringcopy_unalign6
353
354 .p2align 2
355 // Can read 5 bytes before possibly crossing a page.
356.Lstringcopy_unalign5:
357 ldr r2, [r1], #4
358
359 sub ip, r2, #0x01010101
360 bic ip, ip, r2
361 ands ip, ip, #0x80808080
362 bne .Lstringcopy_zero_in_first_register
363
364 ldrb r4, [r1]
365 cbz r4, .Lstringcopy_unalign_copy5bytes
366
367 ldr r3, [r1], #4
368
369 pld [r1, #64]
370
371 sub ip, r3, #0x01010101
372 bic ip, ip, r3
373 ands ip, ip, #0x80808080
374 bne .Lstringcopy_zero_in_second_register
375
376 strd r2, r3, [r0], #8
377 b .Lstringcopy_unalign5
378
379.Lstringcopy_unalign_copy5bytes:
380 str r2, [r0], #4
381 strb r4, [r0]
382 m_pop
383
384.Lstringcopy_unalign_copy6bytes:
385 str r2, [r0], #4
386 strb r4, [r0], #1
387 strb r5, [r0]
388 m_pop
389
390 .p2align 2
391 // Can read 4 bytes before possibly crossing a page.
392.Lstringcopy_unalign4:
393 ldr r2, [r1], #4
394
395 sub ip, r2, #0x01010101
396 bic ip, ip, r2
397 ands ip, ip, #0x80808080
398 bne .Lstringcopy_zero_in_first_register
399
400 ldr r3, [r1], #4
401 pld [r1, #64]
402
403 sub ip, r3, #0x01010101
404 bic ip, ip, r3
405 ands ip, ip, #0x80808080
406 bne .Lstringcopy_zero_in_second_register
407
408 strd r2, r3, [r0], #8
409 b .Lstringcopy_unalign4
410
411 .p2align 2
412 // Can read 3 bytes before possibly crossing a page.
413.Lstringcopy_unalign3:
414 ldrb r2, [r1]
415 cbz r2, .Lstringcopy_unalign3_copy1byte
416 ldrb r3, [r1, #1]
417 cbz r3, .Lstringcopy_unalign3_copy2bytes
418 ldrb r4, [r1, #2]
419 cbz r4, .Lstringcopy_unalign3_copy3bytes
420
421 ldr r2, [r1], #4
422 ldr r3, [r1], #4
423
424 pld [r1, #64]
425
426 lsrs lr, r2, #24
427 beq .Lstringcopy_copy4bytes
428
429 sub ip, r3, #0x01010101
430 bic ip, ip, r3
431 ands ip, ip, #0x80808080
432 bne .Lstringcopy_zero_in_second_register
433
434 strd r2, r3, [r0], #8
435 b .Lstringcopy_unalign3
436
437.Lstringcopy_unalign3_copy1byte:
438 strb r2, [r0]
439 m_pop
440
441.Lstringcopy_unalign3_copy2bytes:
442 strb r2, [r0], #1
443 strb r3, [r0]
444 m_pop
445
446.Lstringcopy_unalign3_copy3bytes:
447 strb r2, [r0], #1
448 strb r3, [r0], #1
449 strb r4, [r0]
450 m_pop
451
452 .p2align 2
453 // Can read 2 bytes before possibly crossing a page.
454.Lstringcopy_unalign2:
455 ldrb r2, [r1]
456 cbz r2, .Lstringcopy_unalign_copy1byte
457 ldrb r4, [r1, #1]
458 cbz r4, .Lstringcopy_unalign_copy2bytes
459
460 ldr r2, [r1], #4
461 ldr r3, [r1], #4
462 pld [r1, #64]
463
464 tst r2, #0xff0000
465 beq .Lstringcopy_copy3bytes
466 lsrs ip, r2, #24
467 beq .Lstringcopy_copy4bytes
468
469 sub ip, r3, #0x01010101
470 bic ip, ip, r3
471 ands ip, ip, #0x80808080
472 bne .Lstringcopy_zero_in_second_register
473
474 strd r2, r3, [r0], #8
475 b .Lstringcopy_unalign2
476
477 .p2align 2
478 // Can read 1 byte before possibly crossing a page.
479.Lstringcopy_unalign1:
480 ldrb r2, [r1]
481 cbz r2, .Lstringcopy_unalign_copy1byte
482
483 ldr r2, [r1], #4
484 ldr r3, [r1], #4
485
486 pld [r1, #64]
487
488 sub ip, r2, #0x01010101
489 bic ip, ip, r2
490 ands ip, ip, #0x80808080
491 bne .Lstringcopy_zero_in_first_register
492
493 sub ip, r3, #0x01010101
494 bic ip, ip, r3
495 ands ip, ip, #0x80808080
496 bne .Lstringcopy_zero_in_second_register
497
498 strd r2, r3, [r0], #8
499 b .Lstringcopy_unalign1
500
501.Lstringcopy_unalign_copy1byte:
502 strb r2, [r0]
503 m_pop
504
505.Lstringcopy_unalign_copy2bytes:
506 strb r2, [r0], #1
507 strb r4, [r0]
508 m_pop
509#if defined(STPCPY)
510END(stpcpy)
511#else
512END(strcpy)
513#endif