blob: 4e624d43a0afc98f3cd19308f94752ab5486ca49 [file] [log] [blame]
Christopher Ferris7c83a1e2013-02-26 01:30:00 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <machine/cpu-features.h>
30#include <machine/asm.h>
Christopher Ferris59a13c12013-08-01 13:13:33 -070031#include "libc_events.h"
Christopher Ferris7c83a1e2013-02-26 01:30:00 -080032
33/*
34 * This code assumes it is running on a processor that supports all arm v7
35 * instructions, that supports neon instructions, and that has a 32 byte
36 * cache line.
37 */
38
39 .text
40 .fpu neon
41
42#define CACHE_LINE_SIZE 32
43
Christopher Ferris59a13c12013-08-01 13:13:33 -070044ENTRY(__memcpy_chk)
45 cmp r2, r3
46 bgt fortify_check_failed
47
48 // Fall through to memcpy...
49END(__memcpy_chk)
50
Christopher Ferris7c83a1e2013-02-26 01:30:00 -080051ENTRY(memcpy)
52 .save {r0, lr}
53 /* start preloading as early as possible */
54 pld [r1, #(CACHE_LINE_SIZE * 0)]
55 stmfd sp!, {r0, lr}
56 pld [r1, #(CACHE_LINE_SIZE * 2)]
57
58 // Check so divider is at least 16 bytes, needed for alignment code.
59 cmp r2, #16
60 blo 5f
61
62
63 /* check if buffers are aligned. If so, run arm-only version */
64 eor r3, r0, r1
65 ands r3, r3, #0x3
66 beq 11f
67
68 /* Check the upper size limit for Neon unaligned memory access in memcpy */
69 cmp r2, #224
70 blo 3f
71
72 /* align destination to 16 bytes for the write-buffer */
73 rsb r3, r0, #0
74 ands r3, r3, #0xF
75 beq 3f
76
77 /* copy up to 15-bytes (count in r3) */
78 sub r2, r2, r3
79 movs ip, r3, lsl #31
80 ldrmib lr, [r1], #1
81 strmib lr, [r0], #1
82 ldrcsb ip, [r1], #1
83 ldrcsb lr, [r1], #1
84 strcsb ip, [r0], #1
85 strcsb lr, [r0], #1
86 movs ip, r3, lsl #29
87 bge 1f
88 // copies 4 bytes, destination 32-bits aligned
89 vld1.32 {d0[0]}, [r1]!
90 vst1.32 {d0[0]}, [r0, :32]!
911: bcc 2f
92 // copies 8 bytes, destination 64-bits aligned
93 vld1.8 {d0}, [r1]!
94 vst1.8 {d0}, [r0, :64]!
952:
96 /* preload immediately the next cache line, which we may need */
97 pld [r1, #(CACHE_LINE_SIZE * 0)]
98 pld [r1, #(CACHE_LINE_SIZE * 2)]
993:
100 /* make sure we have at least 64 bytes to copy */
101 subs r2, r2, #64
102 blo 2f
103
104 /* preload all the cache lines we need */
105 pld [r1, #(CACHE_LINE_SIZE * 4)]
106 pld [r1, #(CACHE_LINE_SIZE * 6)]
107
1081: /* The main loop copies 64 bytes at a time */
109 vld1.8 {d0 - d3}, [r1]!
110 vld1.8 {d4 - d7}, [r1]!
111 pld [r1, #(CACHE_LINE_SIZE * 6)]
112 subs r2, r2, #64
113 vst1.8 {d0 - d3}, [r0]!
114 vst1.8 {d4 - d7}, [r0]!
115 bhs 1b
116
1172: /* fix-up the remaining count and make sure we have >= 32 bytes left */
118 add r2, r2, #64
119 subs r2, r2, #32
120 blo 4f
121
1223: /* 32 bytes at a time. These cache lines were already preloaded */
123 vld1.8 {d0 - d3}, [r1]!
124 subs r2, r2, #32
125 vst1.8 {d0 - d3}, [r0]!
126 bhs 3b
127
1284: /* less than 32 left */
129 add r2, r2, #32
130 tst r2, #0x10
131 beq 5f
132 // copies 16 bytes, 128-bits aligned
133 vld1.8 {d0, d1}, [r1]!
134 vst1.8 {d0, d1}, [r0]!
1355: /* copy up to 15-bytes (count in r2) */
136 movs ip, r2, lsl #29
137 bcc 1f
138 vld1.8 {d0}, [r1]!
139 vst1.8 {d0}, [r0]!
1401: bge 2f
141 vld1.32 {d0[0]}, [r1]!
142 vst1.32 {d0[0]}, [r0]!
1432: movs ip, r2, lsl #31
144 ldrmib r3, [r1], #1
145 ldrcsb ip, [r1], #1
146 ldrcsb lr, [r1], #1
147 strmib r3, [r0], #1
148 strcsb ip, [r0], #1
149 strcsb lr, [r0], #1
150
151 ldmfd sp!, {r0, lr}
152 bx lr
15311:
154 /* Simple arm-only copy loop to handle aligned copy operations */
155 stmfd sp!, {r4, r5, r6, r7, r8}
156 pld [r1, #(CACHE_LINE_SIZE * 4)]
157
158 /* Check alignment */
159 rsb r3, r1, #0
160 ands r3, #3
161 beq 2f
162
163 /* align source to 32 bits. We need to insert 2 instructions between
164 * a ldr[b|h] and str[b|h] because byte and half-word instructions
165 * stall 2 cycles.
166 */
167 movs r12, r3, lsl #31
168 sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
169 ldrmib r3, [r1], #1
170 ldrcsb r4, [r1], #1
171 ldrcsb r5, [r1], #1
172 strmib r3, [r0], #1
173 strcsb r4, [r0], #1
174 strcsb r5, [r0], #1
175
1762:
177 subs r2, r2, #64
178 blt 4f
179
1803: /* Main copy loop, copying 64 bytes at a time */
181 pld [r1, #(CACHE_LINE_SIZE * 8)]
182 ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
183 stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
184 ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
185 stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
186 subs r2, r2, #64
187 bge 3b
188
1894: /* Check if there are > 32 bytes left */
190 adds r2, r2, #64
191 subs r2, r2, #32
192 blt 5f
193
194 /* Copy 32 bytes */
195 ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr}
196 stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr}
197 subs r2, #32
198
1995: /* Handle any remaining bytes */
200 adds r2, #32
201 beq 6f
202
203 movs r12, r2, lsl #28
204 ldmcsia r1!, {r3, r4, r5, r6} /* 16 bytes */
205 ldmmiia r1!, {r7, r8} /* 8 bytes */
206 stmcsia r0!, {r3, r4, r5, r6}
207 stmmiia r0!, {r7, r8}
208 movs r12, r2, lsl #30
209 ldrcs r3, [r1], #4 /* 4 bytes */
210 ldrmih r4, [r1], #2 /* 2 bytes */
211 strcs r3, [r0], #4
212 strmih r4, [r0], #2
213 tst r2, #0x1
214 ldrneb r3, [r1] /* last byte */
215 strneb r3, [r0]
2166:
217 ldmfd sp!, {r4, r5, r6, r7, r8}
218 ldmfd sp!, {r0, pc}
Christopher Ferris59a13c12013-08-01 13:13:33 -0700219
220
221 // Only reached when the __memcpy_chk check fails.
222fortify_check_failed:
223 ldr r0, error_message
224 ldr r1, error_code
2251:
226 add r0, pc
227 bl __fortify_chk_fail
228error_code:
229 .word BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
230error_message:
231 .word error_string-(1b+8)
Christopher Ferris7c83a1e2013-02-26 01:30:00 -0800232END(memcpy)
Christopher Ferris59a13c12013-08-01 13:13:33 -0700233
234 .data
235error_string:
236 .string "memcpy buffer overflow"