blob: 55a35263cbe37eef9ba7d9c99d83508b6dd4903b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* memset.S: optimised assembly memset
2 *
3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13 .text
14 .p2align 4
15
16###############################################################################
17#
18# void *memset(void *p, char ch, size_t count)
19#
20# - NOTE: must not use any stack. exception detection performs function return
21# to caller's fixup routine, aborting the remainder of the set
22# GR4, GR7, GR8, and GR11 must be managed
23#
24###############################################################################
25 .globl memset,__memset_end
26 .type memset,@function
27memset:
28 orcc.p gr10,gr0,gr5,icc3 ; GR5 = count
29 andi gr9,#0xff,gr9
30 or.p gr8,gr0,gr4 ; GR4 = address
31 beqlr icc3,#0
32
33 # conditionally write a byte to 2b-align the address
34 setlos.p #1,gr6
35 andicc gr4,#1,gr0,icc0
36 ckne icc0,cc7
37 cstb.p gr9,@(gr4,gr0) ,cc7,#1
38 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
39 cadd.p gr4,gr6,gr4 ,cc7,#1
40 beqlr icc3,#0
41
42 # conditionally write a word to 4b-align the address
43 andicc.p gr4,#2,gr0,icc0
44 subicc gr5,#2,gr0,icc1
45 setlos.p #2,gr6
46 ckne icc0,cc7
47 slli.p gr9,#8,gr12 ; need to double up the pattern
48 cknc icc1,cc5
49 or.p gr9,gr12,gr12
50 andcr cc7,cc5,cc7
51
52 csth.p gr12,@(gr4,gr0) ,cc7,#1
53 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
54 cadd.p gr4,gr6,gr4 ,cc7,#1
55 beqlr icc3,#0
56
57 # conditionally write a dword to 8b-align the address
58 andicc.p gr4,#4,gr0,icc0
59 subicc gr5,#4,gr0,icc1
60 setlos.p #4,gr6
61 ckne icc0,cc7
62 slli.p gr12,#16,gr13 ; need to quadruple-up the pattern
63 cknc icc1,cc5
64 or.p gr13,gr12,gr12
65 andcr cc7,cc5,cc7
66
67 cst.p gr12,@(gr4,gr0) ,cc7,#1
68 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
69 cadd.p gr4,gr6,gr4 ,cc7,#1
70 beqlr icc3,#0
71
72 or.p gr12,gr12,gr13 ; need to octuple-up the pattern
73
74 # the address is now 8b-aligned - loop around writing 64b chunks
75 setlos #8,gr7
76 subi.p gr4,#8,gr4 ; store with update index does weird stuff
77 setlos #64,gr6
78
79 subicc gr5,#64,gr0,icc0
800: cknc icc0,cc7
81 cstdu gr12,@(gr4,gr7) ,cc7,#1
82 cstdu gr12,@(gr4,gr7) ,cc7,#1
83 cstdu gr12,@(gr4,gr7) ,cc7,#1
84 cstdu gr12,@(gr4,gr7) ,cc7,#1
85 cstdu gr12,@(gr4,gr7) ,cc7,#1
86 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
87 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
88 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
89 subicc gr5,#64,gr0,icc0
90 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
91 beqlr icc3,#0
92 bnc icc0,#2,0b
93
94 # now do 32-byte remnant
95 subicc.p gr5,#32,gr0,icc0
96 setlos #32,gr6
97 cknc icc0,cc7
98 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
99 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
100 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
101 setlos #16,gr6
102 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
103 subicc gr5,#16,gr0,icc0
104 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
105 beqlr icc3,#0
106
107 # now do 16-byte remnant
108 cknc icc0,cc7
109 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
110 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
111 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
112 beqlr icc3,#0
113
114 # now do 8-byte remnant
115 subicc gr5,#8,gr0,icc1
116 cknc icc1,cc7
117 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
118 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
119 setlos.p #4,gr7
120 beqlr icc3,#0
121
122 # now do 4-byte remnant
123 subicc gr5,#4,gr0,icc0
124 addi.p gr4,#4,gr4
125 cknc icc0,cc7
126 cstu.p gr12,@(gr4,gr7) ,cc7,#1
127 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
128 subicc.p gr5,#2,gr0,icc1
129 beqlr icc3,#0
130
131 # now do 2-byte remnant
132 setlos #2,gr7
133 addi.p gr4,#2,gr4
134 cknc icc1,cc7
135 csthu.p gr12,@(gr4,gr7) ,cc7,#1
136 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
137 subicc.p gr5,#1,gr0,icc0
138 beqlr icc3,#0
139
140 # now do 1-byte remnant
141 setlos #0,gr7
142 addi.p gr4,#2,gr4
143 cknc icc0,cc7
144 cstb.p gr12,@(gr4,gr0) ,cc7,#1
145 bralr
146__memset_end:
147
148 .size memset, __memset_end-memset
149
150###############################################################################
151#
152# clear memory in userspace
153# - return the number of bytes that could not be cleared (0 on complete success)
154#
155# long __memset_user(void *p, size_t count)
156#
157###############################################################################
158 .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler
159 .type __memset_user,@function
160__memset_user:
161 movsg lr,gr11
162
163 # abuse memset to do the dirty work
164 or.p gr9,gr9,gr10
165 setlos #0,gr9
166 call memset
167__memset_user_error_lr:
168 jmpl.p @(gr11,gr0)
169 setlos #0,gr8
170
171 # deal any exception generated by memset
172 # GR4 - memset's address tracking pointer
173 # GR7 - memset's step value (index register for store insns)
174 # GR8 - memset's original start address
175 # GR10 - memset's original count
176__memset_user_error_handler:
177 add.p gr4,gr7,gr4
178 add gr8,gr10,gr8
179 jmpl.p @(gr11,gr0)
180 sub gr8,gr4,gr8 ; we return the amount left uncleared
181
182 .size __memset_user, .-__memset_user