blob: 7bd9549a90a23f0c82fa5688f1e2ebf8c3845e7f [file] [log] [blame]
Anton Blanchard17968fb2012-05-27 19:54:03 +00001/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20
21#include <asm/ppc_asm.h>
Anton Blanchardcf8fb552012-06-04 16:02:22 +000022#include <asm/asm-offsets.h>
23
24 .section ".toc","aw"
25PPC64_CACHES:
26 .tc ppc64_caches[TC],ppc64_caches
27 .section ".text"
Anton Blanchard17968fb2012-05-27 19:54:03 +000028
29/**
30 * __clear_user: - Zero a block of memory in user space, with less checking.
31 * @to: Destination address, in user space.
32 * @n: Number of bytes to zero.
33 *
34 * Zero a block of memory in user space. Caller must check
35 * the specified block with access_ok() before calling this function.
36 *
37 * Returns number of bytes that could not be cleared.
38 * On success, this will be zero.
39 */
40
41 .macro err1
42100:
43 .section __ex_table,"a"
44 .align 3
45 .llong 100b,.Ldo_err1
46 .previous
47 .endm
48
49 .macro err2
50200:
51 .section __ex_table,"a"
52 .align 3
53 .llong 200b,.Ldo_err2
54 .previous
55 .endm
56
57 .macro err3
58300:
59 .section __ex_table,"a"
60 .align 3
61 .llong 300b,.Ldo_err3
62 .previous
63 .endm
64
65.Ldo_err1:
66 mr r3,r8
67
68.Ldo_err2:
69 mtctr r4
701:
71err3; stb r0,0(r3)
72 addi r3,r3,1
73 addi r4,r4,-1
74 bdnz 1b
75
76.Ldo_err3:
77 mr r3,r4
78 blr
79
Anton Blanchard2ac7b012014-06-05 08:04:39 +100080_GLOBAL_TOC(__clear_user)
Anton Blanchard17968fb2012-05-27 19:54:03 +000081 cmpdi r4,32
82 neg r6,r3
83 li r0,0
84 blt .Lshort_clear
85 mr r8,r3
86 mtocrf 0x01,r6
87 clrldi r6,r6,(64-3)
88
89 /* Get the destination 8 byte aligned */
90 bf cr7*4+3,1f
91err1; stb r0,0(r3)
92 addi r3,r3,1
93
941: bf cr7*4+2,2f
95err1; sth r0,0(r3)
96 addi r3,r3,2
97
982: bf cr7*4+1,3f
99err1; stw r0,0(r3)
100 addi r3,r3,4
101
1023: sub r4,r4,r6
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000103
Anton Blanchard17968fb2012-05-27 19:54:03 +0000104 cmpdi r4,32
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000105 cmpdi cr1,r4,512
Anton Blanchard17968fb2012-05-27 19:54:03 +0000106 blt .Lshort_clear
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000107 bgt cr1,.Llong_clear
108
109.Lmedium_clear:
110 srdi r6,r4,5
Anton Blanchard17968fb2012-05-27 19:54:03 +0000111 mtctr r6
112
113 /* Do 32 byte chunks */
1144:
115err2; std r0,0(r3)
116err2; std r0,8(r3)
117err2; std r0,16(r3)
118err2; std r0,24(r3)
119 addi r3,r3,32
120 addi r4,r4,-32
121 bdnz 4b
122
123.Lshort_clear:
124 /* up to 31 bytes to go */
125 cmpdi r4,16
126 blt 6f
127err2; std r0,0(r3)
128err2; std r0,8(r3)
129 addi r3,r3,16
130 addi r4,r4,-16
131
132 /* Up to 15 bytes to go */
1336: mr r8,r3
134 clrldi r4,r4,(64-4)
135 mtocrf 0x01,r4
136 bf cr7*4+0,7f
137err1; std r0,0(r3)
138 addi r3,r3,8
139
1407: bf cr7*4+1,8f
141err1; stw r0,0(r3)
142 addi r3,r3,4
143
1448: bf cr7*4+2,9f
145err1; sth r0,0(r3)
146 addi r3,r3,2
147
1489: bf cr7*4+3,10f
149err1; stb r0,0(r3)
150
15110: li r3,0
152 blr
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000153
154.Llong_clear:
155 ld r5,PPC64_CACHES@toc(r2)
156
157 bf cr7*4+0,11f
158err2; std r0,0(r3)
159 addi r3,r3,8
160 addi r4,r4,-8
161
162 /* Destination is 16 byte aligned, need to get it cacheline aligned */
16311: lwz r7,DCACHEL1LOGLINESIZE(r5)
164 lwz r9,DCACHEL1LINESIZE(r5)
165
166 /*
167 * With worst case alignment the long clear loop takes a minimum
168 * of 1 byte less than 2 cachelines.
169 */
170 sldi r10,r9,2
171 cmpd r4,r10
172 blt .Lmedium_clear
173
174 neg r6,r3
175 addi r10,r9,-1
176 and. r5,r6,r10
177 beq 13f
178
179 srdi r6,r5,4
180 mtctr r6
181 mr r8,r3
18212:
183err1; std r0,0(r3)
184err1; std r0,8(r3)
185 addi r3,r3,16
186 bdnz 12b
187
188 sub r4,r4,r5
189
19013: srd r6,r4,r7
191 mtctr r6
192 mr r8,r3
19314:
194err1; dcbz r0,r3
195 add r3,r3,r9
196 bdnz 14b
197
198 and r4,r4,r10
199
200 cmpdi r4,32
201 blt .Lshort_clear
202 b .Lmedium_clear