blob: a9eaa0e8307f84c852791e9e3a7b549b60f2c85b [file] [log] [blame]
agl@chromium.orgaab40902010-06-04 14:47:38 +00001/***************************************************************************
epoger@google.comfd03db02011-07-28 14:24:55 +00002 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
agl@chromium.orgaab40902010-06-04 14:47:38 +00006 ***************************************************************************/
7
8 .code 32
9 .fpu neon
10 .align 4
11 .globl memset32_neon
12 .func
13
14 /* r0 = buffer, r1 = value, r2 = times to write */
15memset32_neon:
16 cmp r2, #1
17 streq r1, [r0], #4
18 bxeq lr
19
20 cmp r2, #4
21 bgt memset32_neon_start
22 cmp r2, #0
23 bxeq lr
24memset32_neon_small:
25 str r1, [r0], #4
26 subs r2, r2, #1
27 bne memset32_neon_small
28 bx lr
29memset32_neon_start:
30 cmp r2, #16
31 blt memset32_dropthru
32 vdup.32 q0, r1
33 vmov q1, q0
34 cmp r2, #32
35 blt memset32_16
36 cmp r2, #64
37 blt memset32_32
38 cmp r2, #128
39 blt memset32_64
40memset32_128:
41 movs r12, r2, lsr #7
42memset32_loop128:
43 subs r12, r12, #1
44 vst1.64 {q0, q1}, [r0]!
45 vst1.64 {q0, q1}, [r0]!
46 vst1.64 {q0, q1}, [r0]!
47 vst1.64 {q0, q1}, [r0]!
48 vst1.64 {q0, q1}, [r0]!
49 vst1.64 {q0, q1}, [r0]!
50 vst1.64 {q0, q1}, [r0]!
51 vst1.64 {q0, q1}, [r0]!
52 vst1.64 {q0, q1}, [r0]!
53 vst1.64 {q0, q1}, [r0]!
54 vst1.64 {q0, q1}, [r0]!
55 vst1.64 {q0, q1}, [r0]!
56 vst1.64 {q0, q1}, [r0]!
57 vst1.64 {q0, q1}, [r0]!
58 vst1.64 {q0, q1}, [r0]!
59 vst1.64 {q0, q1}, [r0]!
60 bne memset32_loop128
61 ands r2, r2, #0x7f
62 bxeq lr
63memset32_64:
64 movs r12, r2, lsr #6
65 beq memset32_32
66 vst1.64 {q0, q1}, [r0]!
67 vst1.64 {q0, q1}, [r0]!
68 vst1.64 {q0, q1}, [r0]!
69 vst1.64 {q0, q1}, [r0]!
70 vst1.64 {q0, q1}, [r0]!
71 vst1.64 {q0, q1}, [r0]!
72 vst1.64 {q0, q1}, [r0]!
73 vst1.64 {q0, q1}, [r0]!
74 ands r2, r2, #0x3f
75 bxeq lr
76memset32_32:
77 movs r12, r2, lsr #5
78 beq memset32_16
79 vst1.64 {q0, q1}, [r0]!
80 vst1.64 {q0, q1}, [r0]!
81 vst1.64 {q0, q1}, [r0]!
82 vst1.64 {q0, q1}, [r0]!
83 ands r2, r2, #0x1f
84 bxeq lr
85memset32_16:
86 movs r12, r2, lsr #4
87 beq memset32_dropthru
88 and r2, r2, #0xf
89 vst1.64 {q0, q1}, [r0]!
90 vst1.64 {q0, q1}, [r0]!
91memset32_dropthru:
92 rsb r2, r2, #15
93 add pc, pc, r2, lsl #2
94 nop
95 str r1, [r0, #56]
96 str r1, [r0, #52]
97 str r1, [r0, #48]
98 str r1, [r0, #44]
99 str r1, [r0, #40]
100 str r1, [r0, #36]
101 str r1, [r0, #32]
102 str r1, [r0, #28]
103 str r1, [r0, #24]
104 str r1, [r0, #20]
105 str r1, [r0, #16]
106 str r1, [r0, #12]
107 str r1, [r0, #8]
108 str r1, [r0, #4]
109 str r1, [r0, #0]
110 bx lr
111
112 .endfunc
113 .end