blob: 57ace356c9490fbce556f82c6dd8a74e2fe8655a [file] [log] [blame]
Anton Blanchard17968fb2012-05-27 19:54:03 +00001/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20
21#include <asm/ppc_asm.h>
Anton Blanchardcf8fb552012-06-04 16:02:22 +000022#include <asm/asm-offsets.h>
Al Viro9445aa12016-01-13 23:33:46 -050023#include <asm/export.h>
Anton Blanchardcf8fb552012-06-04 16:02:22 +000024
25 .section ".toc","aw"
26PPC64_CACHES:
27 .tc ppc64_caches[TC],ppc64_caches
28 .section ".text"
Anton Blanchard17968fb2012-05-27 19:54:03 +000029
30/**
31 * __clear_user: - Zero a block of memory in user space, with less checking.
32 * @to: Destination address, in user space.
33 * @n: Number of bytes to zero.
34 *
35 * Zero a block of memory in user space. Caller must check
36 * the specified block with access_ok() before calling this function.
37 *
38 * Returns number of bytes that could not be cleared.
39 * On success, this will be zero.
40 */
41
42 .macro err1
43100:
44 .section __ex_table,"a"
45 .align 3
46 .llong 100b,.Ldo_err1
47 .previous
48 .endm
49
50 .macro err2
51200:
52 .section __ex_table,"a"
53 .align 3
54 .llong 200b,.Ldo_err2
55 .previous
56 .endm
57
58 .macro err3
59300:
60 .section __ex_table,"a"
61 .align 3
62 .llong 300b,.Ldo_err3
63 .previous
64 .endm
65
66.Ldo_err1:
67 mr r3,r8
68
69.Ldo_err2:
70 mtctr r4
711:
72err3; stb r0,0(r3)
73 addi r3,r3,1
74 addi r4,r4,-1
75 bdnz 1b
76
77.Ldo_err3:
78 mr r3,r4
79 blr
80
Anton Blanchard2ac7b012014-06-05 08:04:39 +100081_GLOBAL_TOC(__clear_user)
Anton Blanchard17968fb2012-05-27 19:54:03 +000082 cmpdi r4,32
83 neg r6,r3
84 li r0,0
85 blt .Lshort_clear
86 mr r8,r3
87 mtocrf 0x01,r6
88 clrldi r6,r6,(64-3)
89
90 /* Get the destination 8 byte aligned */
91 bf cr7*4+3,1f
92err1; stb r0,0(r3)
93 addi r3,r3,1
94
951: bf cr7*4+2,2f
96err1; sth r0,0(r3)
97 addi r3,r3,2
98
992: bf cr7*4+1,3f
100err1; stw r0,0(r3)
101 addi r3,r3,4
102
1033: sub r4,r4,r6
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000104
Anton Blanchard17968fb2012-05-27 19:54:03 +0000105 cmpdi r4,32
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000106 cmpdi cr1,r4,512
Anton Blanchard17968fb2012-05-27 19:54:03 +0000107 blt .Lshort_clear
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000108 bgt cr1,.Llong_clear
109
110.Lmedium_clear:
111 srdi r6,r4,5
Anton Blanchard17968fb2012-05-27 19:54:03 +0000112 mtctr r6
113
114 /* Do 32 byte chunks */
1154:
116err2; std r0,0(r3)
117err2; std r0,8(r3)
118err2; std r0,16(r3)
119err2; std r0,24(r3)
120 addi r3,r3,32
121 addi r4,r4,-32
122 bdnz 4b
123
124.Lshort_clear:
125 /* up to 31 bytes to go */
126 cmpdi r4,16
127 blt 6f
128err2; std r0,0(r3)
129err2; std r0,8(r3)
130 addi r3,r3,16
131 addi r4,r4,-16
132
133 /* Up to 15 bytes to go */
1346: mr r8,r3
135 clrldi r4,r4,(64-4)
136 mtocrf 0x01,r4
137 bf cr7*4+0,7f
138err1; std r0,0(r3)
139 addi r3,r3,8
140
1417: bf cr7*4+1,8f
142err1; stw r0,0(r3)
143 addi r3,r3,4
144
1458: bf cr7*4+2,9f
146err1; sth r0,0(r3)
147 addi r3,r3,2
148
1499: bf cr7*4+3,10f
150err1; stb r0,0(r3)
151
15210: li r3,0
153 blr
Anton Blanchardcf8fb552012-06-04 16:02:22 +0000154
155.Llong_clear:
156 ld r5,PPC64_CACHES@toc(r2)
157
158 bf cr7*4+0,11f
159err2; std r0,0(r3)
160 addi r3,r3,8
161 addi r4,r4,-8
162
163 /* Destination is 16 byte aligned, need to get it cacheline aligned */
16411: lwz r7,DCACHEL1LOGLINESIZE(r5)
165 lwz r9,DCACHEL1LINESIZE(r5)
166
167 /*
168 * With worst case alignment the long clear loop takes a minimum
169 * of 1 byte less than 2 cachelines.
170 */
171 sldi r10,r9,2
172 cmpd r4,r10
173 blt .Lmedium_clear
174
175 neg r6,r3
176 addi r10,r9,-1
177 and. r5,r6,r10
178 beq 13f
179
180 srdi r6,r5,4
181 mtctr r6
182 mr r8,r3
18312:
184err1; std r0,0(r3)
185err1; std r0,8(r3)
186 addi r3,r3,16
187 bdnz 12b
188
189 sub r4,r4,r5
190
19113: srd r6,r4,r7
192 mtctr r6
193 mr r8,r3
19414:
195err1; dcbz r0,r3
196 add r3,r3,r9
197 bdnz 14b
198
199 and r4,r4,r10
200
201 cmpdi r4,32
202 blt .Lshort_clear
203 b .Lmedium_clear
Al Viro9445aa12016-01-13 23:33:46 -0500204EXPORT_SYMBOL(__clear_user)