blob: cb878c44fa3bfa88d87511d3fdd67a639e922ef1 [file] [log] [blame]
Christopher Ferris4e24dcc2013-07-15 12:49:26 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * Copyright (c) 2013 ARM Ltd
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. The name of the company may not be used to endorse or promote
41 * products derived from this software without specific prior written
42 * permission.
43 *
44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 */
55
Elliott Hughes851e68a2014-02-19 16:53:20 -080056#include <private/bionic_asm.h>
Christopher Ferris4e24dcc2013-07-15 12:49:26 -070057
58 .syntax unified
59
60 .thumb
61 .thumb_func
62
63 .macro m_push
64 push {r0, r4, r5, lr}
65 .endm // m_push
66
67 .macro m_pop
68 pop {r0, r4, r5, pc}
69 .endm // m_pop
70
71 .macro m_copy_byte reg, cmd, label
72 ldrb \reg, [r1], #1
73 strb \reg, [r0], #1
74 \cmd \reg, \label
75 .endm // m_copy_byte
76
77ENTRY(strcpy)
78 // For short copies, hard-code checking the first 8 bytes since this
79 // new code doesn't win until after about 8 bytes.
80 m_push
81 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
82 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
83 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
84 m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
85 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
86 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
87 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
88 m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
89
90strcpy_finish:
91 m_pop
92
93strcpy_continue:
94 pld [r1, #0]
95 ands r3, r0, #7
96 beq strcpy_check_src_align
97
98 // Align to a double word (64 bits).
99 rsb r3, r3, #8
100 lsls ip, r3, #31
101 beq strcpy_align_to_32
102
103 ldrb r2, [r1], #1
104 strb r2, [r0], #1
105 cbz r2, strcpy_complete
106
107strcpy_align_to_32:
108 bcc strcpy_align_to_64
109
110 ldrb r2, [r1], #1
111 strb r2, [r0], #1
112 cbz r2, strcpy_complete
113 ldrb r2, [r1], #1
114 strb r2, [r0], #1
115 cbz r2, strcpy_complete
116
117strcpy_align_to_64:
118 tst r3, #4
119 beq strcpy_check_src_align
120 ldr r2, [r1], #4
121
122 sub ip, r2, #0x01010101
123 bic ip, ip, r2
124 ands ip, ip, #0x80808080
125 bne strcpy_zero_in_first_register
126 str r2, [r0], #4
127
128strcpy_check_src_align:
129 // At this point dst is aligned to a double word, check if src
130 // is also aligned to a double word.
131 ands r3, r1, #7
132 bne strcpy_unaligned_copy
133
134 .p2align 2
135strcpy_mainloop:
136 ldrd r2, r3, [r1], #8
137
138 pld [r1, #64]
139
140 sub ip, r2, #0x01010101
141 bic ip, ip, r2
142 ands ip, ip, #0x80808080
143 bne strcpy_zero_in_first_register
144
145 sub ip, r3, #0x01010101
146 bic ip, ip, r3
147 ands ip, ip, #0x80808080
148 bne strcpy_zero_in_second_register
149
150 strd r2, r3, [r0], #8
151 b strcpy_mainloop
152
153strcpy_complete:
154 m_pop
155
156strcpy_zero_in_first_register:
157 lsls lr, ip, #17
158 bne strcpy_copy1byte
159 bcs strcpy_copy2bytes
160 lsls ip, ip, #1
161 bne strcpy_copy3bytes
162
163strcpy_copy4bytes:
164 // Copy 4 bytes to the destiniation.
165 str r2, [r0]
166 m_pop
167
168strcpy_copy1byte:
169 strb r2, [r0]
170 m_pop
171
172strcpy_copy2bytes:
173 strh r2, [r0]
174 m_pop
175
176strcpy_copy3bytes:
177 strh r2, [r0], #2
178 lsr r2, #16
179 strb r2, [r0]
180 m_pop
181
182strcpy_zero_in_second_register:
183 lsls lr, ip, #17
184 bne strcpy_copy5bytes
185 bcs strcpy_copy6bytes
186 lsls ip, ip, #1
187 bne strcpy_copy7bytes
188
189 // Copy 8 bytes to the destination.
190 strd r2, r3, [r0]
191 m_pop
192
193strcpy_copy5bytes:
194 str r2, [r0], #4
195 strb r3, [r0]
196 m_pop
197
198strcpy_copy6bytes:
199 str r2, [r0], #4
200 strh r3, [r0]
201 m_pop
202
203strcpy_copy7bytes:
204 str r2, [r0], #4
205 strh r3, [r0], #2
206 lsr r3, #16
207 strb r3, [r0]
208 m_pop
209
210strcpy_unaligned_copy:
211 // Dst is aligned to a double word, while src is at an unknown alignment.
212 // There are 7 different versions of the unaligned copy code
213 // to prevent overreading the src. The mainloop of every single version
214 // will store 64 bits per loop. The difference is how much of src can
215 // be read without potentially crossing a page boundary.
216 tbb [pc, r3]
217strcpy_unaligned_branchtable:
218 .byte 0
219 .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
220 .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
221 .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
222 .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
223 .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
224 .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
225 .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
226
227 .p2align 2
228 // Can read 7 bytes before possibly crossing a page.
229strcpy_unalign7:
230 ldr r2, [r1], #4
231
232 sub ip, r2, #0x01010101
233 bic ip, ip, r2
234 ands ip, ip, #0x80808080
235 bne strcpy_zero_in_first_register
236
237 ldrb r3, [r1]
238 cbz r3, strcpy_unalign7_copy5bytes
239 ldrb r4, [r1, #1]
240 cbz r4, strcpy_unalign7_copy6bytes
241 ldrb r5, [r1, #2]
242 cbz r5, strcpy_unalign7_copy7bytes
243
244 ldr r3, [r1], #4
245 pld [r1, #64]
246
247 lsrs ip, r3, #24
248 strd r2, r3, [r0], #8
249 beq strcpy_unalign_return
250 b strcpy_unalign7
251
252strcpy_unalign7_copy5bytes:
253 str r2, [r0], #4
254 strb r3, [r0]
255strcpy_unalign_return:
256 m_pop
257
258strcpy_unalign7_copy6bytes:
259 str r2, [r0], #4
260 strb r3, [r0], #1
261 strb r4, [r0], #1
262 m_pop
263
264strcpy_unalign7_copy7bytes:
265 str r2, [r0], #4
266 strb r3, [r0], #1
267 strb r4, [r0], #1
268 strb r5, [r0], #1
269 m_pop
270
271 .p2align 2
272 // Can read 6 bytes before possibly crossing a page.
273strcpy_unalign6:
274 ldr r2, [r1], #4
275
276 sub ip, r2, #0x01010101
277 bic ip, ip, r2
278 ands ip, ip, #0x80808080
279 bne strcpy_zero_in_first_register
280
281 ldrb r4, [r1]
282 cbz r4, strcpy_unalign_copy5bytes
283 ldrb r5, [r1, #1]
284 cbz r5, strcpy_unalign_copy6bytes
285
286 ldr r3, [r1], #4
287 pld [r1, #64]
288
289 tst r3, #0xff0000
290 beq strcpy_copy7bytes
291 lsrs ip, r3, #24
292 strd r2, r3, [r0], #8
293 beq strcpy_unalign_return
294 b strcpy_unalign6
295
296 .p2align 2
297 // Can read 5 bytes before possibly crossing a page.
298strcpy_unalign5:
299 ldr r2, [r1], #4
300
301 sub ip, r2, #0x01010101
302 bic ip, ip, r2
303 ands ip, ip, #0x80808080
304 bne strcpy_zero_in_first_register
305
306 ldrb r4, [r1]
307 cbz r4, strcpy_unalign_copy5bytes
308
309 ldr r3, [r1], #4
310
311 pld [r1, #64]
312
313 sub ip, r3, #0x01010101
314 bic ip, ip, r3
315 ands ip, ip, #0x80808080
316 bne strcpy_zero_in_second_register
317
318 strd r2, r3, [r0], #8
319 b strcpy_unalign5
320
321strcpy_unalign_copy5bytes:
322 str r2, [r0], #4
323 strb r4, [r0]
324 m_pop
325
326strcpy_unalign_copy6bytes:
327 str r2, [r0], #4
328 strb r4, [r0], #1
329 strb r5, [r0]
330 m_pop
331
332 .p2align 2
333 // Can read 4 bytes before possibly crossing a page.
334strcpy_unalign4:
335 ldr r2, [r1], #4
336
337 sub ip, r2, #0x01010101
338 bic ip, ip, r2
339 ands ip, ip, #0x80808080
340 bne strcpy_zero_in_first_register
341
342 ldr r3, [r1], #4
343 pld [r1, #64]
344
345 sub ip, r3, #0x01010101
346 bic ip, ip, r3
347 ands ip, ip, #0x80808080
348 bne strcpy_zero_in_second_register
349
350 strd r2, r3, [r0], #8
351 b strcpy_unalign4
352
353 .p2align 2
354 // Can read 3 bytes before possibly crossing a page.
355strcpy_unalign3:
356 ldrb r2, [r1]
357 cbz r2, strcpy_unalign3_copy1byte
358 ldrb r3, [r1, #1]
359 cbz r3, strcpy_unalign3_copy2bytes
360 ldrb r4, [r1, #2]
361 cbz r4, strcpy_unalign3_copy3bytes
362
363 ldr r2, [r1], #4
364 ldr r3, [r1], #4
365
366 pld [r1, #64]
367
368 lsrs lr, r2, #24
369 beq strcpy_copy4bytes
370
371 sub ip, r3, #0x01010101
372 bic ip, ip, r3
373 ands ip, ip, #0x80808080
374 bne strcpy_zero_in_second_register
375
376 strd r2, r3, [r0], #8
377 b strcpy_unalign3
378
379strcpy_unalign3_copy1byte:
380 strb r2, [r0]
381 m_pop
382
383strcpy_unalign3_copy2bytes:
384 strb r2, [r0], #1
385 strb r3, [r0]
386 m_pop
387
388strcpy_unalign3_copy3bytes:
389 strb r2, [r0], #1
390 strb r3, [r0], #1
391 strb r4, [r0]
392 m_pop
393
394 .p2align 2
395 // Can read 2 bytes before possibly crossing a page.
396strcpy_unalign2:
397 ldrb r2, [r1]
398 cbz r2, strcpy_unalign_copy1byte
399 ldrb r4, [r1, #1]
400 cbz r4, strcpy_unalign_copy2bytes
401
402 ldr r2, [r1], #4
403 ldr r3, [r1], #4
404 pld [r1, #64]
405
406 tst r2, #0xff0000
407 beq strcpy_copy3bytes
408 lsrs ip, r2, #24
409 beq strcpy_copy4bytes
410
411 sub ip, r3, #0x01010101
412 bic ip, ip, r3
413 ands ip, ip, #0x80808080
414 bne strcpy_zero_in_second_register
415
416 strd r2, r3, [r0], #8
417 b strcpy_unalign2
418
419 .p2align 2
420 // Can read 1 byte before possibly crossing a page.
421strcpy_unalign1:
422 ldrb r2, [r1]
423 cbz r2, strcpy_unalign_copy1byte
424
425 ldr r2, [r1], #4
426 ldr r3, [r1], #4
427
428 pld [r1, #64]
429
430 sub ip, r2, #0x01010101
431 bic ip, ip, r2
432 ands ip, ip, #0x80808080
433 bne strcpy_zero_in_first_register
434
435 sub ip, r3, #0x01010101
436 bic ip, ip, r3
437 ands ip, ip, #0x80808080
438 bne strcpy_zero_in_second_register
439
440 strd r2, r3, [r0], #8
441 b strcpy_unalign1
442
443strcpy_unalign_copy1byte:
444 strb r2, [r0]
445 m_pop
446
447strcpy_unalign_copy2bytes:
448 strb r2, [r0], #1
449 strb r4, [r0]
450 m_pop
451END(strcpy)