blob: 4321fa02e18df07368689469049cad31b87eca04 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * MMX 3DNow! library helper functions
4 *
5 * To do:
Ingo Molnarca5d3f12008-02-18 08:53:56 +01006 * We can use MMX just for prefetch in IRQ's. This may be a win.
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * (reported so on K6-III)
8 * We should use a better code neutral filler for the short jump
9 * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
10 * We also want to clobber the filler register so we don't get any
Ingo Molnarca5d3f12008-02-18 08:53:56 +010011 * register forwarding stalls on the filler.
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 *
13 * Add *user handling. Checksums are not a win with MMX on any CPU
14 * tested so far for any MMX solution figured.
15 *
Ingo Molnarca5d3f12008-02-18 08:53:56 +010016 * 22/09/2000 - Arjan van de Ven
17 * Improved for non-egineering-sample Athlons
Linus Torvalds1da177e2005-04-16 15:20:36 -070018 *
19 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +010020#include <linux/hardirq.h>
21#include <linux/string.h>
Paul Gortmakere6830142016-07-13 20:18:57 -040022#include <linux/export.h>
Ingo Molnarca5d3f12008-02-18 08:53:56 +010023#include <linux/sched.h>
24#include <linux/types.h>
25
Ingo Molnardf6b35f2015-04-24 02:46:00 +020026#include <asm/fpu/api.h>
Ingo Molnarca5d3f12008-02-18 08:53:56 +010027#include <asm/asm.h>
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029void *_mmx_memcpy(void *to, const void *from, size_t len)
30{
31 void *p;
32 int i;
33
34 if (unlikely(in_interrupt()))
35 return __memcpy(to, from, len);
36
37 p = to;
38 i = len >> 6; /* len/64 */
39
40 kernel_fpu_begin();
41
42 __asm__ __volatile__ (
43 "1: prefetch (%0)\n" /* This set is 28 bytes */
44 " prefetch 64(%0)\n"
45 " prefetch 128(%0)\n"
46 " prefetch 192(%0)\n"
47 " prefetch 256(%0)\n"
48 "2: \n"
49 ".section .fixup, \"ax\"\n"
50 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
51 " jmp 2b\n"
52 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +010053 _ASM_EXTABLE(1b, 3b)
54 : : "r" (from));
55
56 for ( ; i > 5; i--) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 __asm__ __volatile__ (
58 "1: prefetch 320(%0)\n"
59 "2: movq (%0), %%mm0\n"
60 " movq 8(%0), %%mm1\n"
61 " movq 16(%0), %%mm2\n"
62 " movq 24(%0), %%mm3\n"
63 " movq %%mm0, (%1)\n"
64 " movq %%mm1, 8(%1)\n"
65 " movq %%mm2, 16(%1)\n"
66 " movq %%mm3, 24(%1)\n"
67 " movq 32(%0), %%mm0\n"
68 " movq 40(%0), %%mm1\n"
69 " movq 48(%0), %%mm2\n"
70 " movq 56(%0), %%mm3\n"
71 " movq %%mm0, 32(%1)\n"
72 " movq %%mm1, 40(%1)\n"
73 " movq %%mm2, 48(%1)\n"
74 " movq %%mm3, 56(%1)\n"
75 ".section .fixup, \"ax\"\n"
76 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
77 " jmp 2b\n"
78 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +010079 _ASM_EXTABLE(1b, 3b)
80 : : "r" (from), "r" (to) : "memory");
81
82 from += 64;
83 to += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -070084 }
85
Ingo Molnarca5d3f12008-02-18 08:53:56 +010086 for ( ; i > 0; i--) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070087 __asm__ __volatile__ (
88 " movq (%0), %%mm0\n"
89 " movq 8(%0), %%mm1\n"
90 " movq 16(%0), %%mm2\n"
91 " movq 24(%0), %%mm3\n"
92 " movq %%mm0, (%1)\n"
93 " movq %%mm1, 8(%1)\n"
94 " movq %%mm2, 16(%1)\n"
95 " movq %%mm3, 24(%1)\n"
96 " movq 32(%0), %%mm0\n"
97 " movq 40(%0), %%mm1\n"
98 " movq 48(%0), %%mm2\n"
99 " movq 56(%0), %%mm3\n"
100 " movq %%mm0, 32(%1)\n"
101 " movq %%mm1, 40(%1)\n"
102 " movq %%mm2, 48(%1)\n"
103 " movq %%mm3, 56(%1)\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100104 : : "r" (from), "r" (to) : "memory");
105
106 from += 64;
107 to += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 }
109 /*
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100110 * Now do the tail of the block:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100112 __memcpy(to, from, len & 63);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 kernel_fpu_end();
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100114
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 return p;
116}
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100117EXPORT_SYMBOL(_mmx_memcpy);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119#ifdef CONFIG_MK7
120
121/*
122 * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
123 * other MMX using processors do not.
124 */
125
126static void fast_clear_page(void *page)
127{
128 int i;
129
130 kernel_fpu_begin();
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 __asm__ __volatile__ (
133 " pxor %%mm0, %%mm0\n" : :
134 );
135
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100136 for (i = 0; i < 4096/64; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 __asm__ __volatile__ (
138 " movntq %%mm0, (%0)\n"
139 " movntq %%mm0, 8(%0)\n"
140 " movntq %%mm0, 16(%0)\n"
141 " movntq %%mm0, 24(%0)\n"
142 " movntq %%mm0, 32(%0)\n"
143 " movntq %%mm0, 40(%0)\n"
144 " movntq %%mm0, 48(%0)\n"
145 " movntq %%mm0, 56(%0)\n"
146 : : "r" (page) : "memory");
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100147 page += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 }
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100149
150 /*
151 * Since movntq is weakly-ordered, a "sfence" is needed to become
152 * ordered again:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100154 __asm__ __volatile__("sfence\n"::);
155
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 kernel_fpu_end();
157}
158
159static void fast_copy_page(void *to, void *from)
160{
161 int i;
162
163 kernel_fpu_begin();
164
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100165 /*
166 * maybe the prefetch stuff can go before the expensive fnsave...
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 * but that is for later. -AV
168 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100169 __asm__ __volatile__(
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 "1: prefetch (%0)\n"
171 " prefetch 64(%0)\n"
172 " prefetch 128(%0)\n"
173 " prefetch 192(%0)\n"
174 " prefetch 256(%0)\n"
175 "2: \n"
176 ".section .fixup, \"ax\"\n"
177 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
178 " jmp 2b\n"
179 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100180 _ASM_EXTABLE(1b, 3b) : : "r" (from));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100182 for (i = 0; i < (4096-320)/64; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 __asm__ __volatile__ (
184 "1: prefetch 320(%0)\n"
185 "2: movq (%0), %%mm0\n"
186 " movntq %%mm0, (%1)\n"
187 " movq 8(%0), %%mm1\n"
188 " movntq %%mm1, 8(%1)\n"
189 " movq 16(%0), %%mm2\n"
190 " movntq %%mm2, 16(%1)\n"
191 " movq 24(%0), %%mm3\n"
192 " movntq %%mm3, 24(%1)\n"
193 " movq 32(%0), %%mm4\n"
194 " movntq %%mm4, 32(%1)\n"
195 " movq 40(%0), %%mm5\n"
196 " movntq %%mm5, 40(%1)\n"
197 " movq 48(%0), %%mm6\n"
198 " movntq %%mm6, 48(%1)\n"
199 " movq 56(%0), %%mm7\n"
200 " movntq %%mm7, 56(%1)\n"
201 ".section .fixup, \"ax\"\n"
202 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
203 " jmp 2b\n"
204 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100205 _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
206
207 from += 64;
208 to += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 }
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100210
211 for (i = (4096-320)/64; i < 4096/64; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 __asm__ __volatile__ (
213 "2: movq (%0), %%mm0\n"
214 " movntq %%mm0, (%1)\n"
215 " movq 8(%0), %%mm1\n"
216 " movntq %%mm1, 8(%1)\n"
217 " movq 16(%0), %%mm2\n"
218 " movntq %%mm2, 16(%1)\n"
219 " movq 24(%0), %%mm3\n"
220 " movntq %%mm3, 24(%1)\n"
221 " movq 32(%0), %%mm4\n"
222 " movntq %%mm4, 32(%1)\n"
223 " movq 40(%0), %%mm5\n"
224 " movntq %%mm5, 40(%1)\n"
225 " movq 48(%0), %%mm6\n"
226 " movntq %%mm6, 48(%1)\n"
227 " movq 56(%0), %%mm7\n"
228 " movntq %%mm7, 56(%1)\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100229 : : "r" (from), "r" (to) : "memory");
230 from += 64;
231 to += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 }
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100233 /*
234 * Since movntq is weakly-ordered, a "sfence" is needed to become
235 * ordered again:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100237 __asm__ __volatile__("sfence \n"::);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 kernel_fpu_end();
239}
240
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100241#else /* CONFIG_MK7 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242
243/*
244 * Generic MMX implementation without K7 specific streaming
245 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246static void fast_clear_page(void *page)
247{
248 int i;
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 kernel_fpu_begin();
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 __asm__ __volatile__ (
253 " pxor %%mm0, %%mm0\n" : :
254 );
255
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100256 for (i = 0; i < 4096/128; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 __asm__ __volatile__ (
258 " movq %%mm0, (%0)\n"
259 " movq %%mm0, 8(%0)\n"
260 " movq %%mm0, 16(%0)\n"
261 " movq %%mm0, 24(%0)\n"
262 " movq %%mm0, 32(%0)\n"
263 " movq %%mm0, 40(%0)\n"
264 " movq %%mm0, 48(%0)\n"
265 " movq %%mm0, 56(%0)\n"
266 " movq %%mm0, 64(%0)\n"
267 " movq %%mm0, 72(%0)\n"
268 " movq %%mm0, 80(%0)\n"
269 " movq %%mm0, 88(%0)\n"
270 " movq %%mm0, 96(%0)\n"
271 " movq %%mm0, 104(%0)\n"
272 " movq %%mm0, 112(%0)\n"
273 " movq %%mm0, 120(%0)\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100274 : : "r" (page) : "memory");
275 page += 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 }
277
278 kernel_fpu_end();
279}
280
281static void fast_copy_page(void *to, void *from)
282{
283 int i;
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100284
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 kernel_fpu_begin();
286
287 __asm__ __volatile__ (
288 "1: prefetch (%0)\n"
289 " prefetch 64(%0)\n"
290 " prefetch 128(%0)\n"
291 " prefetch 192(%0)\n"
292 " prefetch 256(%0)\n"
293 "2: \n"
294 ".section .fixup, \"ax\"\n"
295 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
296 " jmp 2b\n"
297 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100298 _ASM_EXTABLE(1b, 3b) : : "r" (from));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100300 for (i = 0; i < 4096/64; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301 __asm__ __volatile__ (
302 "1: prefetch 320(%0)\n"
303 "2: movq (%0), %%mm0\n"
304 " movq 8(%0), %%mm1\n"
305 " movq 16(%0), %%mm2\n"
306 " movq 24(%0), %%mm3\n"
307 " movq %%mm0, (%1)\n"
308 " movq %%mm1, 8(%1)\n"
309 " movq %%mm2, 16(%1)\n"
310 " movq %%mm3, 24(%1)\n"
311 " movq 32(%0), %%mm0\n"
312 " movq 40(%0), %%mm1\n"
313 " movq 48(%0), %%mm2\n"
314 " movq 56(%0), %%mm3\n"
315 " movq %%mm0, 32(%1)\n"
316 " movq %%mm1, 40(%1)\n"
317 " movq %%mm2, 48(%1)\n"
318 " movq %%mm3, 56(%1)\n"
319 ".section .fixup, \"ax\"\n"
320 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
321 " jmp 2b\n"
322 ".previous\n"
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100323 _ASM_EXTABLE(1b, 3b)
324 : : "r" (from), "r" (to) : "memory");
325
326 from += 64;
327 to += 64;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 }
329 kernel_fpu_end();
330}
331
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100332#endif /* !CONFIG_MK7 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
334/*
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100335 * Favour MMX for page clear and copy:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 */
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100337static void slow_zero_page(void *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338{
339 int d0, d1;
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100340
341 __asm__ __volatile__(
342 "cld\n\t"
343 "rep ; stosl"
344
345 : "=&c" (d0), "=&D" (d1)
346 :"a" (0), "1" (page), "0" (1024)
347 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348}
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100349
350void mmx_clear_page(void *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100352 if (unlikely(in_interrupt()))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 slow_zero_page(page);
354 else
355 fast_clear_page(page);
356}
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100357EXPORT_SYMBOL(mmx_clear_page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359static void slow_copy_page(void *to, void *from)
360{
361 int d0, d1, d2;
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100362
363 __asm__ __volatile__(
364 "cld\n\t"
365 "rep ; movsl"
366 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
367 : "0" (1024), "1" ((long) to), "2" ((long) from)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 : "memory");
369}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
371void mmx_copy_page(void *to, void *from)
372{
Ingo Molnarca5d3f12008-02-18 08:53:56 +0100373 if (unlikely(in_interrupt()))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 slow_copy_page(to, from);
375 else
376 fast_copy_page(to, from);
377}
Alexey Dobriyan129f6942005-06-23 00:08:33 -0700378EXPORT_SYMBOL(mmx_copy_page);