blob: 068f2f60cbd4c88181699b3b8090a4b72b6b7af6 [file] [log] [blame]
Brent DeGraaf341ddb52013-10-02 09:47:11 -04001/***************************************************************************
2 Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
Christopher Ferris5f45d582013-08-07 13:09:51 -07003
Brent DeGraaf341ddb52013-10-02 09:47:11 -04004 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of The Linux Foundation nor the names of its contributors may
12 be used to endorse or promote products derived from this software
13 without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***************************************************************************/
27
28/* Assumes neon instructions and a cache line size of 64 bytes. */
29
30#include <machine/cpu-features.h>
31#include <machine/asm.h>
Christopher Ferris5f45d582013-08-07 13:09:51 -070032
33/*
Brent DeGraaf341ddb52013-10-02 09:47:11 -040034 * These default settings are good for all Krait-based systems
35 * as of this writing, but they can be overridden in:
36 * device/<vendor>/<board>/BoardConfig.mk
37 * by setting the following:
38 * TARGET_USE_KRAIT_BIONIC_OPTIMIZATION := true
39 * TARGET_USE_KRAIT_PLD_SET := true
40 * TARGET_KRAIT_BIONIC_PLDOFFS := <pldoffset>
41 * TARGET_KRAIT_BIONIC_PLDSIZE := <pldsize>
42 * TARGET_KRAIT_BIONIC_PLDTHRESH := <pldthreshold>
43 * TARGET_KRAIT_BIONIC_BBTHRESH := <bbthreshold>
Christopher Ferris5f45d582013-08-07 13:09:51 -070044 */
45
Brent DeGraaf341ddb52013-10-02 09:47:11 -040046#ifndef PLDOFFS
47#define PLDOFFS (10)
48#endif
49#ifndef PLDTHRESH
50#define PLDTHRESH (PLDOFFS)
51#endif
52#ifndef BBTHRESH
53#define BBTHRESH (4096/64)
54#endif
55#if (PLDOFFS < 1)
56#error Routine does not support offsets less than 1
57#endif
58#if (PLDTHRESH < PLDOFFS)
59#error PLD threshold must be greater than or equal to the PLD offset
60#endif
61#ifndef PLDSIZE
62#define PLDSIZE (64)
63#endif
64 .text
65 .fpu neon
Christopher Ferris5f45d582013-08-07 13:09:51 -070066
Brent DeGraaf341ddb52013-10-02 09:47:11 -040067ENTRY(MEMCPY_BASE)
68MEMCPY_BASE_ALIGNED:
69 // .cfi_startproc
70 .save {r0, r9, r10, lr}
71 // .cfi_def_cfa_offset 8
72 //.cfi_rel_offset r0, 0
73 //.cfi_rel_offset lr, 4
74 cmp r2, #4
75 blt .Lneon_lt4
76 cmp r2, #16
77 blt .Lneon_lt16
78 cmp r2, #32
79 blt .Lneon_16
80 cmp r2, #64
81 blt .Lneon_copy_32_a
Christopher Ferrisa57c9c02013-08-21 09:41:12 -070082
Brent DeGraaf341ddb52013-10-02 09:47:11 -040083 mov r12, r2, lsr #6
84 cmp r12, #PLDTHRESH
85 ble .Lneon_copy_64_loop_nopld
Christopher Ferris5f45d582013-08-07 13:09:51 -070086
Brent DeGraaf341ddb52013-10-02 09:47:11 -040087 push {r9, r10}
88 .cfi_adjust_cfa_offset 8
89 .cfi_rel_offset r9, 0
90 .cfi_rel_offset r10, 4
Christopher Ferris5f45d582013-08-07 13:09:51 -070091
Brent DeGraaf341ddb52013-10-02 09:47:11 -040092 cmp r12, #BBTHRESH
93 ble .Lneon_prime_pump
Christopher Ferris5f45d582013-08-07 13:09:51 -070094
Brent DeGraaf341ddb52013-10-02 09:47:11 -040095 add lr, r0, #0x400
96 add r9, r1, #(PLDOFFS*PLDSIZE)
97 sub lr, lr, r9
98 lsl lr, lr, #21
99 lsr lr, lr, #21
100 add lr, lr, #(PLDOFFS*PLDSIZE)
101 cmp r12, lr, lsr #6
102 ble .Lneon_prime_pump
Christopher Ferris5f45d582013-08-07 13:09:51 -0700103
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400104 itt gt
105 movgt r9, #(PLDOFFS)
106 rsbsgt r9, r9, lr, lsr #6
107 ble .Lneon_prime_pump
Christopher Ferris5f45d582013-08-07 13:09:51 -0700108
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400109 add r10, r1, lr
110 bic r10, #0x3F
Christopher Ferris5f45d582013-08-07 13:09:51 -0700111
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400112 sub r12, r12, lr, lsr #6
Christopher Ferris5f45d582013-08-07 13:09:51 -0700113
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400114 cmp r9, r12
115 itee le
116 suble r12, r12, r9
117 movgt r9, r12
118 movgt r12, #0
Christopher Ferris5f45d582013-08-07 13:09:51 -0700119
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400120 pld [r1, #((PLDOFFS-1)*PLDSIZE)]
121.Lneon_copy_64_loop_outer_doublepld:
122 pld [r1, #((PLDOFFS)*PLDSIZE)]
123 vld1.32 {q0, q1}, [r1]!
124 vld1.32 {q2, q3}, [r1]!
125 ldr r3, [r10]
126 subs r9, r9, #1
127 vst1.32 {q0, q1}, [r0]!
128 vst1.32 {q2, q3}, [r0]!
129 add r10, #64
130 bne .Lneon_copy_64_loop_outer_doublepld
131 cmp r12, #0
132 beq .Lneon_pop_before_nopld
Christopher Ferris5f45d582013-08-07 13:09:51 -0700133
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400134 cmp r12, #(512*1024/64)
135 blt .Lneon_copy_64_loop_outer
136
137.Lneon_copy_64_loop_ddr:
138 vld1.32 {q0, q1}, [r1]!
139 vld1.32 {q2, q3}, [r1]!
140 pld [r10]
141 subs r12, r12, #1
142 vst1.32 {q0, q1}, [r0]!
143 vst1.32 {q2, q3}, [r0]!
144 add r10, #64
145 bne .Lneon_copy_64_loop_ddr
146 b .Lneon_pop_before_nopld
147
148.Lneon_prime_pump:
149 mov lr, #(PLDOFFS*PLDSIZE)
150 add r10, r1, #(PLDOFFS*PLDSIZE)
151 bic r10, #0x3F
152 sub r12, r12, #PLDOFFS
153 ldr r3, [r10, #(-1*PLDSIZE)]
154.Lneon_copy_64_loop_outer:
155 vld1.32 {q0, q1}, [r1]!
156 vld1.32 {q2, q3}, [r1]!
157 ldr r3, [r10]
158 subs r12, r12, #1
159 vst1.32 {q0, q1}, [r0]!
160 vst1.32 {q2, q3}, [r0]!
161 add r10, #64
162 bne .Lneon_copy_64_loop_outer
163.Lneon_pop_before_nopld:
164 mov r12, lr, lsr #6
165 pop {r9, r10}
166 .cfi_restore r9
167 .cfi_restore r10
168 .cfi_adjust_cfa_offset -8
169
170.Lneon_copy_64_loop_nopld:
171 vld1.32 {q8, q9}, [r1]!
172 vld1.32 {q10, q11}, [r1]!
173 subs r12, r12, #1
174 vst1.32 {q8, q9}, [r0]!
175 vst1.32 {q10, q11}, [r0]!
176 bne .Lneon_copy_64_loop_nopld
177 ands r2, r2, #0x3f
178 .cfi_restore r0
179 .cfi_adjust_cfa_offset -4
180 beq .Lneon_exit
181.Lneon_copy_32_a:
182 movs r3, r2, lsl #27
183 bcc .Lneon_16
184 vld1.32 {q0,q1}, [r1]!
185 vst1.32 {q0,q1}, [r0]!
186.Lneon_16:
187 bpl .Lneon_lt16
188 vld1.32 {q8}, [r1]!
189 vst1.32 {q8}, [r0]!
190 ands r2, r2, #0x0f
191 beq .Lneon_exit
192.Lneon_lt16:
193 movs r3, r2, lsl #29
194 itttt cs
195 ldrcs r3, [r1], #4
196 strcs r3, [r0], #4
197 ldrcs r3, [r1], #4
198 strcs r3, [r0], #4
199 itt mi
200 ldrmi r3, [r1], #4
201 strmi r3, [r0], #4
202.Lneon_lt4:
203 movs r2, r2, lsl #31
204 itt cs
205 ldrhcs r3, [r1], #2
206 strhcs r3, [r0], #2
207 itt mi
208 ldrbmi r3, [r1]
209 strbmi r3, [r0]
210.Lneon_exit:
211 pop {r0, lr}
212 bx lr
213 //.cfi_endproc
Christopher Ferrisa57c9c02013-08-21 09:41:12 -0700214END(MEMCPY_BASE)
Brent DeGraaf341ddb52013-10-02 09:47:11 -0400215