Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * linux/arch/arm/lib/memcpy.S |
| 3 | * |
| 4 | * Copyright (C) 1995-1999 Russell King |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 as |
| 8 | * published by the Free Software Foundation. |
| 9 | * |
| 10 | * ASM optimised string functions |
| 11 | */ |
| 12 | #include <linux/linkage.h> |
| 13 | #include <asm/assembler.h> |
| 14 | |
| 15 | .text |
| 16 | |
| 17 | #define ENTER \ |
| 18 | mov ip,sp ;\ |
| 19 | stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\ |
| 20 | sub fp,ip,#4 |
| 21 | |
| 22 | #define EXIT \ |
| 23 | LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc}) |
| 24 | |
| 25 | #define EXITEQ \ |
| 26 | LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc}) |
| 27 | |
| 28 | /* |
| 29 | * Prototype: void memcpy(void *to,const void *from,unsigned long n); |
| 30 | */ |
| 31 | ENTRY(memcpy) |
| 32 | ENTRY(memmove) |
| 33 | ENTER |
| 34 | cmp r1, r0 |
| 35 | bcc 23f |
| 36 | subs r2, r2, #4 |
| 37 | blt 6f |
| 38 | PLD( pld [r1, #0] ) |
| 39 | ands ip, r0, #3 |
| 40 | bne 7f |
| 41 | ands ip, r1, #3 |
| 42 | bne 8f |
| 43 | |
| 44 | 1: subs r2, r2, #8 |
| 45 | blt 5f |
| 46 | subs r2, r2, #20 |
| 47 | blt 4f |
| 48 | PLD( pld [r1, #28] ) |
| 49 | PLD( subs r2, r2, #64 ) |
| 50 | PLD( blt 3f ) |
| 51 | 2: PLD( pld [r1, #60] ) |
| 52 | PLD( pld [r1, #92] ) |
| 53 | ldmia r1!, {r3 - r9, ip} |
| 54 | subs r2, r2, #32 |
| 55 | stmgeia r0!, {r3 - r9, ip} |
| 56 | ldmgeia r1!, {r3 - r9, ip} |
| 57 | subges r2, r2, #32 |
| 58 | stmia r0!, {r3 - r9, ip} |
| 59 | bge 2b |
| 60 | 3: PLD( ldmia r1!, {r3 - r9, ip} ) |
| 61 | PLD( adds r2, r2, #32 ) |
| 62 | PLD( stmgeia r0!, {r3 - r9, ip} ) |
| 63 | PLD( ldmgeia r1!, {r3 - r9, ip} ) |
| 64 | PLD( subges r2, r2, #32 ) |
| 65 | PLD( stmia r0!, {r3 - r9, ip} ) |
| 66 | 4: cmn r2, #16 |
| 67 | ldmgeia r1!, {r3 - r6} |
| 68 | subge r2, r2, #16 |
| 69 | stmgeia r0!, {r3 - r6} |
| 70 | adds r2, r2, #20 |
| 71 | ldmgeia r1!, {r3 - r5} |
| 72 | subge r2, r2, #12 |
| 73 | stmgeia r0!, {r3 - r5} |
| 74 | 5: adds r2, r2, #8 |
| 75 | blt 6f |
| 76 | subs r2, r2, #4 |
| 77 | ldrlt r3, [r1], #4 |
| 78 | ldmgeia r1!, {r4, r5} |
| 79 | subge r2, r2, #4 |
| 80 | strlt r3, [r0], #4 |
| 81 | stmgeia r0!, {r4, r5} |
| 82 | |
| 83 | 6: adds r2, r2, #4 |
| 84 | EXITEQ |
| 85 | cmp r2, #2 |
| 86 | ldrb r3, [r1], #1 |
| 87 | ldrgeb r4, [r1], #1 |
| 88 | ldrgtb r5, [r1], #1 |
| 89 | strb r3, [r0], #1 |
| 90 | strgeb r4, [r0], #1 |
| 91 | strgtb r5, [r0], #1 |
| 92 | EXIT |
| 93 | |
| 94 | 7: rsb ip, ip, #4 |
| 95 | cmp ip, #2 |
| 96 | ldrb r3, [r1], #1 |
| 97 | ldrgeb r4, [r1], #1 |
| 98 | ldrgtb r5, [r1], #1 |
| 99 | strb r3, [r0], #1 |
| 100 | strgeb r4, [r0], #1 |
| 101 | strgtb r5, [r0], #1 |
| 102 | subs r2, r2, ip |
| 103 | blt 6b |
| 104 | ands ip, r1, #3 |
| 105 | beq 1b |
| 106 | |
| 107 | 8: bic r1, r1, #3 |
| 108 | ldr r7, [r1], #4 |
| 109 | cmp ip, #2 |
| 110 | bgt 18f |
| 111 | beq 13f |
| 112 | cmp r2, #12 |
| 113 | blt 11f |
| 114 | PLD( pld [r1, #12] ) |
| 115 | sub r2, r2, #12 |
| 116 | PLD( subs r2, r2, #32 ) |
| 117 | PLD( blt 10f ) |
| 118 | PLD( pld [r1, #28] ) |
| 119 | 9: PLD( pld [r1, #44] ) |
| 120 | 10: mov r3, r7, pull #8 |
| 121 | ldmia r1!, {r4 - r7} |
| 122 | subs r2, r2, #16 |
| 123 | orr r3, r3, r4, push #24 |
| 124 | mov r4, r4, pull #8 |
| 125 | orr r4, r4, r5, push #24 |
| 126 | mov r5, r5, pull #8 |
| 127 | orr r5, r5, r6, push #24 |
| 128 | mov r6, r6, pull #8 |
| 129 | orr r6, r6, r7, push #24 |
| 130 | stmia r0!, {r3 - r6} |
| 131 | bge 9b |
| 132 | PLD( cmn r2, #32 ) |
| 133 | PLD( bge 10b ) |
| 134 | PLD( add r2, r2, #32 ) |
| 135 | adds r2, r2, #12 |
| 136 | blt 12f |
| 137 | 11: mov r3, r7, pull #8 |
| 138 | ldr r7, [r1], #4 |
| 139 | subs r2, r2, #4 |
| 140 | orr r3, r3, r7, push #24 |
| 141 | str r3, [r0], #4 |
| 142 | bge 11b |
| 143 | 12: sub r1, r1, #3 |
| 144 | b 6b |
| 145 | |
| 146 | 13: cmp r2, #12 |
| 147 | blt 16f |
| 148 | PLD( pld [r1, #12] ) |
| 149 | sub r2, r2, #12 |
| 150 | PLD( subs r2, r2, #32 ) |
| 151 | PLD( blt 15f ) |
| 152 | PLD( pld [r1, #28] ) |
| 153 | 14: PLD( pld [r1, #44] ) |
| 154 | 15: mov r3, r7, pull #16 |
| 155 | ldmia r1!, {r4 - r7} |
| 156 | subs r2, r2, #16 |
| 157 | orr r3, r3, r4, push #16 |
| 158 | mov r4, r4, pull #16 |
| 159 | orr r4, r4, r5, push #16 |
| 160 | mov r5, r5, pull #16 |
| 161 | orr r5, r5, r6, push #16 |
| 162 | mov r6, r6, pull #16 |
| 163 | orr r6, r6, r7, push #16 |
| 164 | stmia r0!, {r3 - r6} |
| 165 | bge 14b |
| 166 | PLD( cmn r2, #32 ) |
| 167 | PLD( bge 15b ) |
| 168 | PLD( add r2, r2, #32 ) |
| 169 | adds r2, r2, #12 |
| 170 | blt 17f |
| 171 | 16: mov r3, r7, pull #16 |
| 172 | ldr r7, [r1], #4 |
| 173 | subs r2, r2, #4 |
| 174 | orr r3, r3, r7, push #16 |
| 175 | str r3, [r0], #4 |
| 176 | bge 16b |
| 177 | 17: sub r1, r1, #2 |
| 178 | b 6b |
| 179 | |
| 180 | 18: cmp r2, #12 |
| 181 | blt 21f |
| 182 | PLD( pld [r1, #12] ) |
| 183 | sub r2, r2, #12 |
| 184 | PLD( subs r2, r2, #32 ) |
| 185 | PLD( blt 20f ) |
| 186 | PLD( pld [r1, #28] ) |
| 187 | 19: PLD( pld [r1, #44] ) |
| 188 | 20: mov r3, r7, pull #24 |
| 189 | ldmia r1!, {r4 - r7} |
| 190 | subs r2, r2, #16 |
| 191 | orr r3, r3, r4, push #8 |
| 192 | mov r4, r4, pull #24 |
| 193 | orr r4, r4, r5, push #8 |
| 194 | mov r5, r5, pull #24 |
| 195 | orr r5, r5, r6, push #8 |
| 196 | mov r6, r6, pull #24 |
| 197 | orr r6, r6, r7, push #8 |
| 198 | stmia r0!, {r3 - r6} |
| 199 | bge 19b |
| 200 | PLD( cmn r2, #32 ) |
| 201 | PLD( bge 20b ) |
| 202 | PLD( add r2, r2, #32 ) |
| 203 | adds r2, r2, #12 |
| 204 | blt 22f |
| 205 | 21: mov r3, r7, pull #24 |
| 206 | ldr r7, [r1], #4 |
| 207 | subs r2, r2, #4 |
| 208 | orr r3, r3, r7, push #8 |
| 209 | str r3, [r0], #4 |
| 210 | bge 21b |
| 211 | 22: sub r1, r1, #1 |
| 212 | b 6b |
| 213 | |
| 214 | |
| 215 | 23: add r1, r1, r2 |
| 216 | add r0, r0, r2 |
| 217 | subs r2, r2, #4 |
| 218 | blt 29f |
| 219 | PLD( pld [r1, #-4] ) |
| 220 | ands ip, r0, #3 |
| 221 | bne 30f |
| 222 | ands ip, r1, #3 |
| 223 | bne 31f |
| 224 | |
| 225 | 24: subs r2, r2, #8 |
| 226 | blt 28f |
| 227 | subs r2, r2, #20 |
| 228 | blt 27f |
| 229 | PLD( pld [r1, #-32] ) |
| 230 | PLD( subs r2, r2, #64 ) |
| 231 | PLD( blt 26f ) |
| 232 | 25: PLD( pld [r1, #-64] ) |
| 233 | PLD( pld [r1, #-96] ) |
| 234 | ldmdb r1!, {r3 - r9, ip} |
| 235 | subs r2, r2, #32 |
| 236 | stmgedb r0!, {r3 - r9, ip} |
| 237 | ldmgedb r1!, {r3 - r9, ip} |
| 238 | subges r2, r2, #32 |
| 239 | stmdb r0!, {r3 - r9, ip} |
| 240 | bge 25b |
| 241 | 26: PLD( ldmdb r1!, {r3 - r9, ip} ) |
| 242 | PLD( adds r2, r2, #32 ) |
| 243 | PLD( stmgedb r0!, {r3 - r9, ip} ) |
| 244 | PLD( ldmgedb r1!, {r3 - r9, ip} ) |
| 245 | PLD( subges r2, r2, #32 ) |
| 246 | PLD( stmdb r0!, {r3 - r9, ip} ) |
| 247 | 27: cmn r2, #16 |
| 248 | ldmgedb r1!, {r3 - r6} |
| 249 | subge r2, r2, #16 |
| 250 | stmgedb r0!, {r3 - r6} |
| 251 | adds r2, r2, #20 |
| 252 | ldmgedb r1!, {r3 - r5} |
| 253 | subge r2, r2, #12 |
| 254 | stmgedb r0!, {r3 - r5} |
| 255 | 28: adds r2, r2, #8 |
| 256 | blt 29f |
| 257 | subs r2, r2, #4 |
| 258 | ldrlt r3, [r1, #-4]! |
| 259 | ldmgedb r1!, {r4, r5} |
| 260 | subge r2, r2, #4 |
| 261 | strlt r3, [r0, #-4]! |
| 262 | stmgedb r0!, {r4, r5} |
| 263 | |
| 264 | 29: adds r2, r2, #4 |
| 265 | EXITEQ |
| 266 | cmp r2, #2 |
| 267 | ldrb r3, [r1, #-1]! |
| 268 | ldrgeb r4, [r1, #-1]! |
| 269 | ldrgtb r5, [r1, #-1]! |
| 270 | strb r3, [r0, #-1]! |
| 271 | strgeb r4, [r0, #-1]! |
| 272 | strgtb r5, [r0, #-1]! |
| 273 | EXIT |
| 274 | |
| 275 | 30: cmp ip, #2 |
| 276 | ldrb r3, [r1, #-1]! |
| 277 | ldrgeb r4, [r1, #-1]! |
| 278 | ldrgtb r5, [r1, #-1]! |
| 279 | strb r3, [r0, #-1]! |
| 280 | strgeb r4, [r0, #-1]! |
| 281 | strgtb r5, [r0, #-1]! |
| 282 | subs r2, r2, ip |
| 283 | blt 29b |
| 284 | ands ip, r1, #3 |
| 285 | beq 24b |
| 286 | |
| 287 | 31: bic r1, r1, #3 |
| 288 | ldr r3, [r1], #0 |
| 289 | cmp ip, #2 |
| 290 | blt 41f |
| 291 | beq 36f |
| 292 | cmp r2, #12 |
| 293 | blt 34f |
| 294 | PLD( pld [r1, #-16] ) |
| 295 | sub r2, r2, #12 |
| 296 | PLD( subs r2, r2, #32 ) |
| 297 | PLD( blt 33f ) |
| 298 | PLD( pld [r1, #-32] ) |
| 299 | 32: PLD( pld [r1, #-48] ) |
| 300 | 33: mov r7, r3, push #8 |
| 301 | ldmdb r1!, {r3, r4, r5, r6} |
| 302 | subs r2, r2, #16 |
| 303 | orr r7, r7, r6, pull #24 |
| 304 | mov r6, r6, push #8 |
| 305 | orr r6, r6, r5, pull #24 |
| 306 | mov r5, r5, push #8 |
| 307 | orr r5, r5, r4, pull #24 |
| 308 | mov r4, r4, push #8 |
| 309 | orr r4, r4, r3, pull #24 |
| 310 | stmdb r0!, {r4, r5, r6, r7} |
| 311 | bge 32b |
| 312 | PLD( cmn r2, #32 ) |
| 313 | PLD( bge 33b ) |
| 314 | PLD( add r2, r2, #32 ) |
| 315 | adds r2, r2, #12 |
| 316 | blt 35f |
| 317 | 34: mov ip, r3, push #8 |
| 318 | ldr r3, [r1, #-4]! |
| 319 | subs r2, r2, #4 |
| 320 | orr ip, ip, r3, pull #24 |
| 321 | str ip, [r0, #-4]! |
| 322 | bge 34b |
| 323 | 35: add r1, r1, #3 |
| 324 | b 29b |
| 325 | |
| 326 | 36: cmp r2, #12 |
| 327 | blt 39f |
| 328 | PLD( pld [r1, #-16] ) |
| 329 | sub r2, r2, #12 |
| 330 | PLD( subs r2, r2, #32 ) |
| 331 | PLD( blt 38f ) |
| 332 | PLD( pld [r1, #-32] ) |
| 333 | 37: PLD( pld [r1, #-48] ) |
| 334 | 38: mov r7, r3, push #16 |
| 335 | ldmdb r1!, {r3, r4, r5, r6} |
| 336 | subs r2, r2, #16 |
| 337 | orr r7, r7, r6, pull #16 |
| 338 | mov r6, r6, push #16 |
| 339 | orr r6, r6, r5, pull #16 |
| 340 | mov r5, r5, push #16 |
| 341 | orr r5, r5, r4, pull #16 |
| 342 | mov r4, r4, push #16 |
| 343 | orr r4, r4, r3, pull #16 |
| 344 | stmdb r0!, {r4, r5, r6, r7} |
| 345 | bge 37b |
| 346 | PLD( cmn r2, #32 ) |
| 347 | PLD( bge 38b ) |
| 348 | PLD( add r2, r2, #32 ) |
| 349 | adds r2, r2, #12 |
| 350 | blt 40f |
| 351 | 39: mov ip, r3, push #16 |
| 352 | ldr r3, [r1, #-4]! |
| 353 | subs r2, r2, #4 |
| 354 | orr ip, ip, r3, pull #16 |
| 355 | str ip, [r0, #-4]! |
| 356 | bge 39b |
| 357 | 40: add r1, r1, #2 |
| 358 | b 29b |
| 359 | |
| 360 | 41: cmp r2, #12 |
| 361 | blt 44f |
| 362 | PLD( pld [r1, #-16] ) |
| 363 | sub r2, r2, #12 |
| 364 | PLD( subs r2, r2, #32 ) |
| 365 | PLD( blt 43f ) |
| 366 | PLD( pld [r1, #-32] ) |
| 367 | 42: PLD( pld [r1, #-48] ) |
| 368 | 43: mov r7, r3, push #24 |
| 369 | ldmdb r1!, {r3, r4, r5, r6} |
| 370 | subs r2, r2, #16 |
| 371 | orr r7, r7, r6, pull #8 |
| 372 | mov r6, r6, push #24 |
| 373 | orr r6, r6, r5, pull #8 |
| 374 | mov r5, r5, push #24 |
| 375 | orr r5, r5, r4, pull #8 |
| 376 | mov r4, r4, push #24 |
| 377 | orr r4, r4, r3, pull #8 |
| 378 | stmdb r0!, {r4, r5, r6, r7} |
| 379 | bge 42b |
| 380 | PLD( cmn r2, #32 ) |
| 381 | PLD( bge 43b ) |
| 382 | PLD( add r2, r2, #32 ) |
| 383 | adds r2, r2, #12 |
| 384 | blt 45f |
| 385 | 44: mov ip, r3, push #24 |
| 386 | ldr r3, [r1, #-4]! |
| 387 | subs r2, r2, #4 |
| 388 | orr ip, ip, r3, pull #8 |
| 389 | str ip, [r0, #-4]! |
| 390 | bge 44b |
| 391 | 45: add r1, r1, #1 |
| 392 | b 29b |
| 393 | |