blob: fa034764e0b0e07e30c8e5585c8dced5e309f718 [file] [log] [blame]
agl@chromium.orgaab40902010-06-04 14:47:38 +00001/***************************************************************************
epoger@google.comfd03db02011-07-28 14:24:55 +00002 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
agl@chromium.orgaab40902010-06-04 14:47:38 +00006 ***************************************************************************/
7
8 .code 32
9 .fpu neon
10 .align 4
11 .globl memset32_neon
agl@chromium.orgaab40902010-06-04 14:47:38 +000012
13 /* r0 = buffer, r1 = value, r2 = times to write */
14memset32_neon:
15 cmp r2, #1
16 streq r1, [r0], #4
17 bxeq lr
18
19 cmp r2, #4
20 bgt memset32_neon_start
21 cmp r2, #0
22 bxeq lr
23memset32_neon_small:
24 str r1, [r0], #4
25 subs r2, r2, #1
26 bne memset32_neon_small
27 bx lr
28memset32_neon_start:
29 cmp r2, #16
30 blt memset32_dropthru
31 vdup.32 q0, r1
32 vmov q1, q0
33 cmp r2, #32
34 blt memset32_16
35 cmp r2, #64
36 blt memset32_32
37 cmp r2, #128
38 blt memset32_64
39memset32_128:
40 movs r12, r2, lsr #7
41memset32_loop128:
42 subs r12, r12, #1
43 vst1.64 {q0, q1}, [r0]!
44 vst1.64 {q0, q1}, [r0]!
45 vst1.64 {q0, q1}, [r0]!
46 vst1.64 {q0, q1}, [r0]!
47 vst1.64 {q0, q1}, [r0]!
48 vst1.64 {q0, q1}, [r0]!
49 vst1.64 {q0, q1}, [r0]!
50 vst1.64 {q0, q1}, [r0]!
51 vst1.64 {q0, q1}, [r0]!
52 vst1.64 {q0, q1}, [r0]!
53 vst1.64 {q0, q1}, [r0]!
54 vst1.64 {q0, q1}, [r0]!
55 vst1.64 {q0, q1}, [r0]!
56 vst1.64 {q0, q1}, [r0]!
57 vst1.64 {q0, q1}, [r0]!
58 vst1.64 {q0, q1}, [r0]!
59 bne memset32_loop128
60 ands r2, r2, #0x7f
61 bxeq lr
62memset32_64:
63 movs r12, r2, lsr #6
64 beq memset32_32
65 vst1.64 {q0, q1}, [r0]!
66 vst1.64 {q0, q1}, [r0]!
67 vst1.64 {q0, q1}, [r0]!
68 vst1.64 {q0, q1}, [r0]!
69 vst1.64 {q0, q1}, [r0]!
70 vst1.64 {q0, q1}, [r0]!
71 vst1.64 {q0, q1}, [r0]!
72 vst1.64 {q0, q1}, [r0]!
73 ands r2, r2, #0x3f
74 bxeq lr
75memset32_32:
76 movs r12, r2, lsr #5
77 beq memset32_16
78 vst1.64 {q0, q1}, [r0]!
79 vst1.64 {q0, q1}, [r0]!
80 vst1.64 {q0, q1}, [r0]!
81 vst1.64 {q0, q1}, [r0]!
82 ands r2, r2, #0x1f
83 bxeq lr
84memset32_16:
85 movs r12, r2, lsr #4
86 beq memset32_dropthru
87 and r2, r2, #0xf
88 vst1.64 {q0, q1}, [r0]!
89 vst1.64 {q0, q1}, [r0]!
90memset32_dropthru:
91 rsb r2, r2, #15
92 add pc, pc, r2, lsl #2
93 nop
94 str r1, [r0, #56]
95 str r1, [r0, #52]
96 str r1, [r0, #48]
97 str r1, [r0, #44]
98 str r1, [r0, #40]
99 str r1, [r0, #36]
100 str r1, [r0, #32]
101 str r1, [r0, #28]
102 str r1, [r0, #24]
103 str r1, [r0, #20]
104 str r1, [r0, #16]
105 str r1, [r0, #12]
106 str r1, [r0, #8]
107 str r1, [r0, #4]
108 str r1, [r0, #0]
109 bx lr
110
agl@chromium.orgaab40902010-06-04 14:47:38 +0000111 .end