| /*************************************************************************** |
| * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| ***************************************************************************/ |
| |
| .code 32 |
| .fpu neon |
| .align 4 |
| .globl memset32_neon |
| |
| /* r0 = buffer, r1 = value, r2 = times to write */ |
| memset32_neon: |
| cmp r2, #1 |
| streq r1, [r0], #4 |
| bxeq lr |
| |
| cmp r2, #4 |
| bgt memset32_neon_start |
| cmp r2, #0 |
| bxeq lr |
| memset32_neon_small: |
| str r1, [r0], #4 |
| subs r2, r2, #1 |
| bne memset32_neon_small |
| bx lr |
| memset32_neon_start: |
| cmp r2, #16 |
| blt memset32_dropthru |
| vdup.32 q0, r1 |
| vmov q1, q0 |
| cmp r2, #32 |
| blt memset32_16 |
| cmp r2, #64 |
| blt memset32_32 |
| cmp r2, #128 |
| blt memset32_64 |
| memset32_128: |
| movs r12, r2, lsr #7 |
| memset32_loop128: |
| subs r12, r12, #1 |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| bne memset32_loop128 |
| ands r2, r2, #0x7f |
| bxeq lr |
| memset32_64: |
| movs r12, r2, lsr #6 |
| beq memset32_32 |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| ands r2, r2, #0x3f |
| bxeq lr |
| memset32_32: |
| movs r12, r2, lsr #5 |
| beq memset32_16 |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| ands r2, r2, #0x1f |
| bxeq lr |
| memset32_16: |
| movs r12, r2, lsr #4 |
| beq memset32_dropthru |
| and r2, r2, #0xf |
| vst1.64 {q0, q1}, [r0]! |
| vst1.64 {q0, q1}, [r0]! |
| memset32_dropthru: |
| rsb r2, r2, #15 |
| add pc, pc, r2, lsl #2 |
| nop |
| str r1, [r0, #56] |
| str r1, [r0, #52] |
| str r1, [r0, #48] |
| str r1, [r0, #44] |
| str r1, [r0, #40] |
| str r1, [r0, #36] |
| str r1, [r0, #32] |
| str r1, [r0, #28] |
| str r1, [r0, #24] |
| str r1, [r0, #20] |
| str r1, [r0, #16] |
| str r1, [r0, #12] |
| str r1, [r0, #8] |
| str r1, [r0, #4] |
| str r1, [r0, #0] |
| bx lr |
| |
| .end |