blob: a4ee97b5a2bfcfd3099353b44e8678c4df5bff6b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/arm/lib/memset.S
3 *
4 * Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */
12#include <linux/linkage.h>
13#include <asm/assembler.h>
Lin Yongtingc2459d352014-11-16 03:14:58 +010014#include <asm/unwind.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015
16 .text
17 .align 5
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
19ENTRY(memset)
Lin Yongtingc2459d352014-11-16 03:14:58 +010020UNWIND( .fnstart )
Nicolas Pitre418df63a2013-03-12 13:00:42 +010021 ands r3, r0, #3 @ 1 unaligned?
22 mov ip, r0 @ preserve r0 as return value
23 bne 6f @ 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070024/*
Ivan Djelic455bd4c2013-03-06 20:09:27 +010025 * we know that the pointer in ip is aligned to a word boundary.
Linus Torvalds1da177e2005-04-16 15:20:36 -070026 */
Nicolas Pitre418df63a2013-03-12 13:00:42 +0100271: orr r1, r1, r1, lsl #8
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 orr r1, r1, r1, lsl #16
29 mov r3, r1
30 cmp r2, #16
31 blt 4f
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040032
33#if ! CALGN(1)+0
34
Linus Torvalds1da177e2005-04-16 15:20:36 -070035/*
Ivan Djelic455bd4c2013-03-06 20:09:27 +010036 * We need 2 extra registers for this loop - use r8 and the LR
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
Ivan Djelic455bd4c2013-03-06 20:09:27 +010038 stmfd sp!, {r8, lr}
Lin Yongtingc2459d352014-11-16 03:14:58 +010039UNWIND( .fnend )
40UNWIND( .fnstart )
41UNWIND( .save {r8, lr} )
Ivan Djelic455bd4c2013-03-06 20:09:27 +010042 mov r8, r1
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 mov lr, r1
44
452: subs r2, r2, #64
Ivan Djelic455bd4c2013-03-06 20:09:27 +010046 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
47 stmgeia ip!, {r1, r3, r8, lr}
48 stmgeia ip!, {r1, r3, r8, lr}
49 stmgeia ip!, {r1, r3, r8, lr}
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 bgt 2b
Ivan Djelic455bd4c2013-03-06 20:09:27 +010051 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
Linus Torvalds1da177e2005-04-16 15:20:36 -070052/*
53 * No need to correct the count; we're only testing bits from now on
54 */
55 tst r2, #32
Ivan Djelic455bd4c2013-03-06 20:09:27 +010056 stmneia ip!, {r1, r3, r8, lr}
57 stmneia ip!, {r1, r3, r8, lr}
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 tst r2, #16
Ivan Djelic455bd4c2013-03-06 20:09:27 +010059 stmneia ip!, {r1, r3, r8, lr}
60 ldmfd sp!, {r8, lr}
Lin Yongtingc2459d352014-11-16 03:14:58 +010061UNWIND( .fnend )
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040063#else
64
65/*
66 * This version aligns the destination pointer in order to write
67 * whole cache lines at once.
68 */
69
Ivan Djelic455bd4c2013-03-06 20:09:27 +010070 stmfd sp!, {r4-r8, lr}
Lin Yongtingc2459d352014-11-16 03:14:58 +010071UNWIND( .fnend )
72UNWIND( .fnstart )
73UNWIND( .save {r4-r8, lr} )
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040074 mov r4, r1
75 mov r5, r1
76 mov r6, r1
77 mov r7, r1
Ivan Djelic455bd4c2013-03-06 20:09:27 +010078 mov r8, r1
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040079 mov lr, r1
80
81 cmp r2, #96
Ivan Djelic455bd4c2013-03-06 20:09:27 +010082 tstgt ip, #31
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040083 ble 3f
84
Ivan Djelic455bd4c2013-03-06 20:09:27 +010085 and r8, ip, #31
86 rsb r8, r8, #32
87 sub r2, r2, r8
88 movs r8, r8, lsl #(32 - 4)
89 stmcsia ip!, {r4, r5, r6, r7}
90 stmmiia ip!, {r4, r5}
91 tst r8, #(1 << 30)
92 mov r8, r1
93 strne r1, [ip], #4
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040094
953: subs r2, r2, #64
Ivan Djelic455bd4c2013-03-06 20:09:27 +010096 stmgeia ip!, {r1, r3-r8, lr}
97 stmgeia ip!, {r1, r3-r8, lr}
Nicolas Pitref91a8dc2008-04-11 21:04:28 -040098 bgt 3b
Ivan Djelic455bd4c2013-03-06 20:09:27 +010099 ldmeqfd sp!, {r4-r8, pc}
Nicolas Pitref91a8dc2008-04-11 21:04:28 -0400100
101 tst r2, #32
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100102 stmneia ip!, {r1, r3-r8, lr}
Nicolas Pitref91a8dc2008-04-11 21:04:28 -0400103 tst r2, #16
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100104 stmneia ip!, {r4-r7}
105 ldmfd sp!, {r4-r8, lr}
Lin Yongtingc2459d352014-11-16 03:14:58 +0100106UNWIND( .fnend )
Nicolas Pitref91a8dc2008-04-11 21:04:28 -0400107
108#endif
109
Lin Yongtingc2459d352014-11-16 03:14:58 +0100110UNWIND( .fnstart )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114: tst r2, #8
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100112 stmneia ip!, {r1, r3}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 tst r2, #4
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100114 strne r1, [ip], #4
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115/*
116 * When we get here, we've got less than 4 bytes to zero. We
117 * may have an unaligned pointer as well.
118 */
1195: tst r2, #2
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100120 strneb r1, [ip], #1
121 strneb r1, [ip], #1
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 tst r2, #1
Ivan Djelic455bd4c2013-03-06 20:09:27 +0100123 strneb r1, [ip], #1
Russell King6ebbf2c2014-06-30 16:29:12 +0100124 ret lr
Nicolas Pitre418df63a2013-03-12 13:00:42 +0100125
1266: subs r2, r2, #4 @ 1 do we have enough
127 blt 5b @ 1 bytes to align with?
128 cmp r3, #2 @ 1
129 strltb r1, [ip], #1 @ 1
130 strleb r1, [ip], #1 @ 1
131 strb r1, [ip], #1 @ 1
132 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
133 b 1b
Lin Yongtingc2459d352014-11-16 03:14:58 +0100134UNWIND( .fnend )
Catalin Marinas93ed3972008-08-28 11:22:32 +0100135ENDPROC(memset)