blob: 5ee1c2bc0499b983268f3d22d2285b75c9e7307b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * include/asm-alpha/xor.h
3 *
4 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
17extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
18 unsigned long *);
19extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
20 unsigned long *, unsigned long *);
21extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
22 unsigned long *, unsigned long *, unsigned long *);
23
24extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
25 unsigned long *);
26extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
27 unsigned long *, unsigned long *);
28extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
29 unsigned long *, unsigned long *,
30 unsigned long *);
31extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
32 unsigned long *, unsigned long *,
33 unsigned long *, unsigned long *);
34
35asm(" \n\
36 .text \n\
37 .align 3 \n\
38 .ent xor_alpha_2 \n\
39xor_alpha_2: \n\
40 .prologue 0 \n\
41 srl $16, 6, $16 \n\
42 .align 4 \n\
432: \n\
44 ldq $0,0($17) \n\
45 ldq $1,0($18) \n\
46 ldq $2,8($17) \n\
47 ldq $3,8($18) \n\
48 \n\
49 ldq $4,16($17) \n\
50 ldq $5,16($18) \n\
51 ldq $6,24($17) \n\
52 ldq $7,24($18) \n\
53 \n\
54 ldq $19,32($17) \n\
55 ldq $20,32($18) \n\
56 ldq $21,40($17) \n\
57 ldq $22,40($18) \n\
58 \n\
59 ldq $23,48($17) \n\
60 ldq $24,48($18) \n\
61 ldq $25,56($17) \n\
62 xor $0,$1,$0 # 7 cycles from $1 load \n\
63 \n\
64 ldq $27,56($18) \n\
65 xor $2,$3,$2 \n\
66 stq $0,0($17) \n\
67 xor $4,$5,$4 \n\
68 \n\
69 stq $2,8($17) \n\
70 xor $6,$7,$6 \n\
71 stq $4,16($17) \n\
72 xor $19,$20,$19 \n\
73 \n\
74 stq $6,24($17) \n\
75 xor $21,$22,$21 \n\
76 stq $19,32($17) \n\
77 xor $23,$24,$23 \n\
78 \n\
79 stq $21,40($17) \n\
80 xor $25,$27,$25 \n\
81 stq $23,48($17) \n\
82 subq $16,1,$16 \n\
83 \n\
84 stq $25,56($17) \n\
85 addq $17,64,$17 \n\
86 addq $18,64,$18 \n\
87 bgt $16,2b \n\
88 \n\
89 ret \n\
90 .end xor_alpha_2 \n\
91 \n\
92 .align 3 \n\
93 .ent xor_alpha_3 \n\
94xor_alpha_3: \n\
95 .prologue 0 \n\
96 srl $16, 6, $16 \n\
97 .align 4 \n\
983: \n\
99 ldq $0,0($17) \n\
100 ldq $1,0($18) \n\
101 ldq $2,0($19) \n\
102 ldq $3,8($17) \n\
103 \n\
104 ldq $4,8($18) \n\
105 ldq $6,16($17) \n\
106 ldq $7,16($18) \n\
107 ldq $21,24($17) \n\
108 \n\
109 ldq $22,24($18) \n\
110 ldq $24,32($17) \n\
111 ldq $25,32($18) \n\
112 ldq $5,8($19) \n\
113 \n\
114 ldq $20,16($19) \n\
115 ldq $23,24($19) \n\
116 ldq $27,32($19) \n\
117 nop \n\
118 \n\
119 xor $0,$1,$1 # 8 cycles from $0 load \n\
120 xor $3,$4,$4 # 6 cycles from $4 load \n\
121 xor $6,$7,$7 # 6 cycles from $7 load \n\
122 xor $21,$22,$22 # 5 cycles from $22 load \n\
123 \n\
124 xor $1,$2,$2 # 9 cycles from $2 load \n\
125 xor $24,$25,$25 # 5 cycles from $25 load \n\
126 stq $2,0($17) \n\
127 xor $4,$5,$5 # 6 cycles from $5 load \n\
128 \n\
129 stq $5,8($17) \n\
130 xor $7,$20,$20 # 7 cycles from $20 load \n\
131 stq $20,16($17) \n\
132 xor $22,$23,$23 # 7 cycles from $23 load \n\
133 \n\
134 stq $23,24($17) \n\
135 xor $25,$27,$27 # 7 cycles from $27 load \n\
136 stq $27,32($17) \n\
137 nop \n\
138 \n\
139 ldq $0,40($17) \n\
140 ldq $1,40($18) \n\
141 ldq $3,48($17) \n\
142 ldq $4,48($18) \n\
143 \n\
144 ldq $6,56($17) \n\
145 ldq $7,56($18) \n\
146 ldq $2,40($19) \n\
147 ldq $5,48($19) \n\
148 \n\
149 ldq $20,56($19) \n\
150 xor $0,$1,$1 # 4 cycles from $1 load \n\
151 xor $3,$4,$4 # 5 cycles from $4 load \n\
152 xor $6,$7,$7 # 5 cycles from $7 load \n\
153 \n\
154 xor $1,$2,$2 # 4 cycles from $2 load \n\
155 xor $4,$5,$5 # 5 cycles from $5 load \n\
156 stq $2,40($17) \n\
157 xor $7,$20,$20 # 4 cycles from $20 load \n\
158 \n\
159 stq $5,48($17) \n\
160 subq $16,1,$16 \n\
161 stq $20,56($17) \n\
162 addq $19,64,$19 \n\
163 \n\
164 addq $18,64,$18 \n\
165 addq $17,64,$17 \n\
166 bgt $16,3b \n\
167 ret \n\
168 .end xor_alpha_3 \n\
169 \n\
170 .align 3 \n\
171 .ent xor_alpha_4 \n\
172xor_alpha_4: \n\
173 .prologue 0 \n\
174 srl $16, 6, $16 \n\
175 .align 4 \n\
1764: \n\
177 ldq $0,0($17) \n\
178 ldq $1,0($18) \n\
179 ldq $2,0($19) \n\
180 ldq $3,0($20) \n\
181 \n\
182 ldq $4,8($17) \n\
183 ldq $5,8($18) \n\
184 ldq $6,8($19) \n\
185 ldq $7,8($20) \n\
186 \n\
187 ldq $21,16($17) \n\
188 ldq $22,16($18) \n\
189 ldq $23,16($19) \n\
190 ldq $24,16($20) \n\
191 \n\
192 ldq $25,24($17) \n\
193 xor $0,$1,$1 # 6 cycles from $1 load \n\
194 ldq $27,24($18) \n\
195 xor $2,$3,$3 # 6 cycles from $3 load \n\
196 \n\
197 ldq $0,24($19) \n\
198 xor $1,$3,$3 \n\
199 ldq $1,24($20) \n\
200 xor $4,$5,$5 # 7 cycles from $5 load \n\
201 \n\
202 stq $3,0($17) \n\
203 xor $6,$7,$7 \n\
204 xor $21,$22,$22 # 7 cycles from $22 load \n\
205 xor $5,$7,$7 \n\
206 \n\
207 stq $7,8($17) \n\
208 xor $23,$24,$24 # 7 cycles from $24 load \n\
209 ldq $2,32($17) \n\
210 xor $22,$24,$24 \n\
211 \n\
212 ldq $3,32($18) \n\
213 ldq $4,32($19) \n\
214 ldq $5,32($20) \n\
215 xor $25,$27,$27 # 8 cycles from $27 load \n\
216 \n\
217 ldq $6,40($17) \n\
218 ldq $7,40($18) \n\
219 ldq $21,40($19) \n\
220 ldq $22,40($20) \n\
221 \n\
222 stq $24,16($17) \n\
223 xor $0,$1,$1 # 9 cycles from $1 load \n\
224 xor $2,$3,$3 # 5 cycles from $3 load \n\
225 xor $27,$1,$1 \n\
226 \n\
227 stq $1,24($17) \n\
228 xor $4,$5,$5 # 5 cycles from $5 load \n\
229 ldq $23,48($17) \n\
230 ldq $24,48($18) \n\
231 \n\
232 ldq $25,48($19) \n\
233 xor $3,$5,$5 \n\
234 ldq $27,48($20) \n\
235 ldq $0,56($17) \n\
236 \n\
237 ldq $1,56($18) \n\
238 ldq $2,56($19) \n\
239 xor $6,$7,$7 # 8 cycles from $6 load \n\
240 ldq $3,56($20) \n\
241 \n\
242 stq $5,32($17) \n\
243 xor $21,$22,$22 # 8 cycles from $22 load \n\
244 xor $7,$22,$22 \n\
245 xor $23,$24,$24 # 5 cycles from $24 load \n\
246 \n\
247 stq $22,40($17) \n\
248 xor $25,$27,$27 # 5 cycles from $27 load \n\
249 xor $24,$27,$27 \n\
250 xor $0,$1,$1 # 5 cycles from $1 load \n\
251 \n\
252 stq $27,48($17) \n\
253 xor $2,$3,$3 # 4 cycles from $3 load \n\
254 xor $1,$3,$3 \n\
255 subq $16,1,$16 \n\
256 \n\
257 stq $3,56($17) \n\
258 addq $20,64,$20 \n\
259 addq $19,64,$19 \n\
260 addq $18,64,$18 \n\
261 \n\
262 addq $17,64,$17 \n\
263 bgt $16,4b \n\
264 ret \n\
265 .end xor_alpha_4 \n\
266 \n\
267 .align 3 \n\
268 .ent xor_alpha_5 \n\
269xor_alpha_5: \n\
270 .prologue 0 \n\
271 srl $16, 6, $16 \n\
272 .align 4 \n\
2735: \n\
274 ldq $0,0($17) \n\
275 ldq $1,0($18) \n\
276 ldq $2,0($19) \n\
277 ldq $3,0($20) \n\
278 \n\
279 ldq $4,0($21) \n\
280 ldq $5,8($17) \n\
281 ldq $6,8($18) \n\
282 ldq $7,8($19) \n\
283 \n\
284 ldq $22,8($20) \n\
285 ldq $23,8($21) \n\
286 ldq $24,16($17) \n\
287 ldq $25,16($18) \n\
288 \n\
289 ldq $27,16($19) \n\
290 xor $0,$1,$1 # 6 cycles from $1 load \n\
291 ldq $28,16($20) \n\
292 xor $2,$3,$3 # 6 cycles from $3 load \n\
293 \n\
294 ldq $0,16($21) \n\
295 xor $1,$3,$3 \n\
296 ldq $1,24($17) \n\
297 xor $3,$4,$4 # 7 cycles from $4 load \n\
298 \n\
299 stq $4,0($17) \n\
300 xor $5,$6,$6 # 7 cycles from $6 load \n\
301 xor $7,$22,$22 # 7 cycles from $22 load \n\
302 xor $6,$23,$23 # 7 cycles from $23 load \n\
303 \n\
304 ldq $2,24($18) \n\
305 xor $22,$23,$23 \n\
306 ldq $3,24($19) \n\
307 xor $24,$25,$25 # 8 cycles from $25 load \n\
308 \n\
309 stq $23,8($17) \n\
310 xor $25,$27,$27 # 8 cycles from $27 load \n\
311 ldq $4,24($20) \n\
312 xor $28,$0,$0 # 7 cycles from $0 load \n\
313 \n\
314 ldq $5,24($21) \n\
315 xor $27,$0,$0 \n\
316 ldq $6,32($17) \n\
317 ldq $7,32($18) \n\
318 \n\
319 stq $0,16($17) \n\
320 xor $1,$2,$2 # 6 cycles from $2 load \n\
321 ldq $22,32($19) \n\
322 xor $3,$4,$4 # 4 cycles from $4 load \n\
323 \n\
324 ldq $23,32($20) \n\
325 xor $2,$4,$4 \n\
326 ldq $24,32($21) \n\
327 ldq $25,40($17) \n\
328 \n\
329 ldq $27,40($18) \n\
330 ldq $28,40($19) \n\
331 ldq $0,40($20) \n\
332 xor $4,$5,$5 # 7 cycles from $5 load \n\
333 \n\
334 stq $5,24($17) \n\
335 xor $6,$7,$7 # 7 cycles from $7 load \n\
336 ldq $1,40($21) \n\
337 ldq $2,48($17) \n\
338 \n\
339 ldq $3,48($18) \n\
340 xor $7,$22,$22 # 7 cycles from $22 load \n\
341 ldq $4,48($19) \n\
342 xor $23,$24,$24 # 6 cycles from $24 load \n\
343 \n\
344 ldq $5,48($20) \n\
345 xor $22,$24,$24 \n\
346 ldq $6,48($21) \n\
347 xor $25,$27,$27 # 7 cycles from $27 load \n\
348 \n\
349 stq $24,32($17) \n\
350 xor $27,$28,$28 # 8 cycles from $28 load \n\
351 ldq $7,56($17) \n\
352 xor $0,$1,$1 # 6 cycles from $1 load \n\
353 \n\
354 ldq $22,56($18) \n\
355 ldq $23,56($19) \n\
356 ldq $24,56($20) \n\
357 ldq $25,56($21) \n\
358 \n\
359 xor $28,$1,$1 \n\
360 xor $2,$3,$3 # 9 cycles from $3 load \n\
361 xor $3,$4,$4 # 9 cycles from $4 load \n\
362 xor $5,$6,$6 # 8 cycles from $6 load \n\
363 \n\
364 stq $1,40($17) \n\
365 xor $4,$6,$6 \n\
366 xor $7,$22,$22 # 7 cycles from $22 load \n\
367 xor $23,$24,$24 # 6 cycles from $24 load \n\
368 \n\
369 stq $6,48($17) \n\
370 xor $22,$24,$24 \n\
371 subq $16,1,$16 \n\
372 xor $24,$25,$25 # 8 cycles from $25 load \n\
373 \n\
374 stq $25,56($17) \n\
375 addq $21,64,$21 \n\
376 addq $20,64,$20 \n\
377 addq $19,64,$19 \n\
378 \n\
379 addq $18,64,$18 \n\
380 addq $17,64,$17 \n\
381 bgt $16,5b \n\
382 ret \n\
383 .end xor_alpha_5 \n\
384 \n\
385 .align 3 \n\
386 .ent xor_alpha_prefetch_2 \n\
387xor_alpha_prefetch_2: \n\
388 .prologue 0 \n\
389 srl $16, 6, $16 \n\
390 \n\
391 ldq $31, 0($17) \n\
392 ldq $31, 0($18) \n\
393 \n\
394 ldq $31, 64($17) \n\
395 ldq $31, 64($18) \n\
396 \n\
397 ldq $31, 128($17) \n\
398 ldq $31, 128($18) \n\
399 \n\
400 ldq $31, 192($17) \n\
401 ldq $31, 192($18) \n\
402 .align 4 \n\
4032: \n\
404 ldq $0,0($17) \n\
405 ldq $1,0($18) \n\
406 ldq $2,8($17) \n\
407 ldq $3,8($18) \n\
408 \n\
409 ldq $4,16($17) \n\
410 ldq $5,16($18) \n\
411 ldq $6,24($17) \n\
412 ldq $7,24($18) \n\
413 \n\
414 ldq $19,32($17) \n\
415 ldq $20,32($18) \n\
416 ldq $21,40($17) \n\
417 ldq $22,40($18) \n\
418 \n\
419 ldq $23,48($17) \n\
420 ldq $24,48($18) \n\
421 ldq $25,56($17) \n\
422 ldq $27,56($18) \n\
423 \n\
424 ldq $31,256($17) \n\
425 xor $0,$1,$0 # 8 cycles from $1 load \n\
426 ldq $31,256($18) \n\
427 xor $2,$3,$2 \n\
428 \n\
429 stq $0,0($17) \n\
430 xor $4,$5,$4 \n\
431 stq $2,8($17) \n\
432 xor $6,$7,$6 \n\
433 \n\
434 stq $4,16($17) \n\
435 xor $19,$20,$19 \n\
436 stq $6,24($17) \n\
437 xor $21,$22,$21 \n\
438 \n\
439 stq $19,32($17) \n\
440 xor $23,$24,$23 \n\
441 stq $21,40($17) \n\
442 xor $25,$27,$25 \n\
443 \n\
444 stq $23,48($17) \n\
445 subq $16,1,$16 \n\
446 stq $25,56($17) \n\
447 addq $17,64,$17 \n\
448 \n\
449 addq $18,64,$18 \n\
450 bgt $16,2b \n\
451 ret \n\
452 .end xor_alpha_prefetch_2 \n\
453 \n\
454 .align 3 \n\
455 .ent xor_alpha_prefetch_3 \n\
456xor_alpha_prefetch_3: \n\
457 .prologue 0 \n\
458 srl $16, 6, $16 \n\
459 \n\
460 ldq $31, 0($17) \n\
461 ldq $31, 0($18) \n\
462 ldq $31, 0($19) \n\
463 \n\
464 ldq $31, 64($17) \n\
465 ldq $31, 64($18) \n\
466 ldq $31, 64($19) \n\
467 \n\
468 ldq $31, 128($17) \n\
469 ldq $31, 128($18) \n\
470 ldq $31, 128($19) \n\
471 \n\
472 ldq $31, 192($17) \n\
473 ldq $31, 192($18) \n\
474 ldq $31, 192($19) \n\
475 .align 4 \n\
4763: \n\
477 ldq $0,0($17) \n\
478 ldq $1,0($18) \n\
479 ldq $2,0($19) \n\
480 ldq $3,8($17) \n\
481 \n\
482 ldq $4,8($18) \n\
483 ldq $6,16($17) \n\
484 ldq $7,16($18) \n\
485 ldq $21,24($17) \n\
486 \n\
487 ldq $22,24($18) \n\
488 ldq $24,32($17) \n\
489 ldq $25,32($18) \n\
490 ldq $5,8($19) \n\
491 \n\
492 ldq $20,16($19) \n\
493 ldq $23,24($19) \n\
494 ldq $27,32($19) \n\
495 nop \n\
496 \n\
497 xor $0,$1,$1 # 8 cycles from $0 load \n\
498 xor $3,$4,$4 # 7 cycles from $4 load \n\
499 xor $6,$7,$7 # 6 cycles from $7 load \n\
500 xor $21,$22,$22 # 5 cycles from $22 load \n\
501 \n\
502 xor $1,$2,$2 # 9 cycles from $2 load \n\
503 xor $24,$25,$25 # 5 cycles from $25 load \n\
504 stq $2,0($17) \n\
505 xor $4,$5,$5 # 6 cycles from $5 load \n\
506 \n\
507 stq $5,8($17) \n\
508 xor $7,$20,$20 # 7 cycles from $20 load \n\
509 stq $20,16($17) \n\
510 xor $22,$23,$23 # 7 cycles from $23 load \n\
511 \n\
512 stq $23,24($17) \n\
513 xor $25,$27,$27 # 7 cycles from $27 load \n\
514 stq $27,32($17) \n\
515 nop \n\
516 \n\
517 ldq $0,40($17) \n\
518 ldq $1,40($18) \n\
519 ldq $3,48($17) \n\
520 ldq $4,48($18) \n\
521 \n\
522 ldq $6,56($17) \n\
523 ldq $7,56($18) \n\
524 ldq $2,40($19) \n\
525 ldq $5,48($19) \n\
526 \n\
527 ldq $20,56($19) \n\
528 ldq $31,256($17) \n\
529 ldq $31,256($18) \n\
530 ldq $31,256($19) \n\
531 \n\
532 xor $0,$1,$1 # 6 cycles from $1 load \n\
533 xor $3,$4,$4 # 5 cycles from $4 load \n\
534 xor $6,$7,$7 # 5 cycles from $7 load \n\
535 xor $1,$2,$2 # 4 cycles from $2 load \n\
536 \n\
537 xor $4,$5,$5 # 5 cycles from $5 load \n\
538 xor $7,$20,$20 # 4 cycles from $20 load \n\
539 stq $2,40($17) \n\
540 subq $16,1,$16 \n\
541 \n\
542 stq $5,48($17) \n\
543 addq $19,64,$19 \n\
544 stq $20,56($17) \n\
545 addq $18,64,$18 \n\
546 \n\
547 addq $17,64,$17 \n\
548 bgt $16,3b \n\
549 ret \n\
550 .end xor_alpha_prefetch_3 \n\
551 \n\
552 .align 3 \n\
553 .ent xor_alpha_prefetch_4 \n\
554xor_alpha_prefetch_4: \n\
555 .prologue 0 \n\
556 srl $16, 6, $16 \n\
557 \n\
558 ldq $31, 0($17) \n\
559 ldq $31, 0($18) \n\
560 ldq $31, 0($19) \n\
561 ldq $31, 0($20) \n\
562 \n\
563 ldq $31, 64($17) \n\
564 ldq $31, 64($18) \n\
565 ldq $31, 64($19) \n\
566 ldq $31, 64($20) \n\
567 \n\
568 ldq $31, 128($17) \n\
569 ldq $31, 128($18) \n\
570 ldq $31, 128($19) \n\
571 ldq $31, 128($20) \n\
572 \n\
573 ldq $31, 192($17) \n\
574 ldq $31, 192($18) \n\
575 ldq $31, 192($19) \n\
576 ldq $31, 192($20) \n\
577 .align 4 \n\
5784: \n\
579 ldq $0,0($17) \n\
580 ldq $1,0($18) \n\
581 ldq $2,0($19) \n\
582 ldq $3,0($20) \n\
583 \n\
584 ldq $4,8($17) \n\
585 ldq $5,8($18) \n\
586 ldq $6,8($19) \n\
587 ldq $7,8($20) \n\
588 \n\
589 ldq $21,16($17) \n\
590 ldq $22,16($18) \n\
591 ldq $23,16($19) \n\
592 ldq $24,16($20) \n\
593 \n\
594 ldq $25,24($17) \n\
595 xor $0,$1,$1 # 6 cycles from $1 load \n\
596 ldq $27,24($18) \n\
597 xor $2,$3,$3 # 6 cycles from $3 load \n\
598 \n\
599 ldq $0,24($19) \n\
600 xor $1,$3,$3 \n\
601 ldq $1,24($20) \n\
602 xor $4,$5,$5 # 7 cycles from $5 load \n\
603 \n\
604 stq $3,0($17) \n\
605 xor $6,$7,$7 \n\
606 xor $21,$22,$22 # 7 cycles from $22 load \n\
607 xor $5,$7,$7 \n\
608 \n\
609 stq $7,8($17) \n\
610 xor $23,$24,$24 # 7 cycles from $24 load \n\
611 ldq $2,32($17) \n\
612 xor $22,$24,$24 \n\
613 \n\
614 ldq $3,32($18) \n\
615 ldq $4,32($19) \n\
616 ldq $5,32($20) \n\
617 xor $25,$27,$27 # 8 cycles from $27 load \n\
618 \n\
619 ldq $6,40($17) \n\
620 ldq $7,40($18) \n\
621 ldq $21,40($19) \n\
622 ldq $22,40($20) \n\
623 \n\
624 stq $24,16($17) \n\
625 xor $0,$1,$1 # 9 cycles from $1 load \n\
626 xor $2,$3,$3 # 5 cycles from $3 load \n\
627 xor $27,$1,$1 \n\
628 \n\
629 stq $1,24($17) \n\
630 xor $4,$5,$5 # 5 cycles from $5 load \n\
631 ldq $23,48($17) \n\
632 xor $3,$5,$5 \n\
633 \n\
634 ldq $24,48($18) \n\
635 ldq $25,48($19) \n\
636 ldq $27,48($20) \n\
637 ldq $0,56($17) \n\
638 \n\
639 ldq $1,56($18) \n\
640 ldq $2,56($19) \n\
641 ldq $3,56($20) \n\
642 xor $6,$7,$7 # 8 cycles from $6 load \n\
643 \n\
644 ldq $31,256($17) \n\
645 xor $21,$22,$22 # 8 cycles from $22 load \n\
646 ldq $31,256($18) \n\
647 xor $7,$22,$22 \n\
648 \n\
649 ldq $31,256($19) \n\
650 xor $23,$24,$24 # 6 cycles from $24 load \n\
651 ldq $31,256($20) \n\
652 xor $25,$27,$27 # 6 cycles from $27 load \n\
653 \n\
654 stq $5,32($17) \n\
655 xor $24,$27,$27 \n\
656 xor $0,$1,$1 # 7 cycles from $1 load \n\
657 xor $2,$3,$3 # 6 cycles from $3 load \n\
658 \n\
659 stq $22,40($17) \n\
660 xor $1,$3,$3 \n\
661 stq $27,48($17) \n\
662 subq $16,1,$16 \n\
663 \n\
664 stq $3,56($17) \n\
665 addq $20,64,$20 \n\
666 addq $19,64,$19 \n\
667 addq $18,64,$18 \n\
668 \n\
669 addq $17,64,$17 \n\
670 bgt $16,4b \n\
671 ret \n\
672 .end xor_alpha_prefetch_4 \n\
673 \n\
674 .align 3 \n\
675 .ent xor_alpha_prefetch_5 \n\
676xor_alpha_prefetch_5: \n\
677 .prologue 0 \n\
678 srl $16, 6, $16 \n\
679 \n\
680 ldq $31, 0($17) \n\
681 ldq $31, 0($18) \n\
682 ldq $31, 0($19) \n\
683 ldq $31, 0($20) \n\
684 ldq $31, 0($21) \n\
685 \n\
686 ldq $31, 64($17) \n\
687 ldq $31, 64($18) \n\
688 ldq $31, 64($19) \n\
689 ldq $31, 64($20) \n\
690 ldq $31, 64($21) \n\
691 \n\
692 ldq $31, 128($17) \n\
693 ldq $31, 128($18) \n\
694 ldq $31, 128($19) \n\
695 ldq $31, 128($20) \n\
696 ldq $31, 128($21) \n\
697 \n\
698 ldq $31, 192($17) \n\
699 ldq $31, 192($18) \n\
700 ldq $31, 192($19) \n\
701 ldq $31, 192($20) \n\
702 ldq $31, 192($21) \n\
703 .align 4 \n\
7045: \n\
705 ldq $0,0($17) \n\
706 ldq $1,0($18) \n\
707 ldq $2,0($19) \n\
708 ldq $3,0($20) \n\
709 \n\
710 ldq $4,0($21) \n\
711 ldq $5,8($17) \n\
712 ldq $6,8($18) \n\
713 ldq $7,8($19) \n\
714 \n\
715 ldq $22,8($20) \n\
716 ldq $23,8($21) \n\
717 ldq $24,16($17) \n\
718 ldq $25,16($18) \n\
719 \n\
720 ldq $27,16($19) \n\
721 xor $0,$1,$1 # 6 cycles from $1 load \n\
722 ldq $28,16($20) \n\
723 xor $2,$3,$3 # 6 cycles from $3 load \n\
724 \n\
725 ldq $0,16($21) \n\
726 xor $1,$3,$3 \n\
727 ldq $1,24($17) \n\
728 xor $3,$4,$4 # 7 cycles from $4 load \n\
729 \n\
730 stq $4,0($17) \n\
731 xor $5,$6,$6 # 7 cycles from $6 load \n\
732 xor $7,$22,$22 # 7 cycles from $22 load \n\
733 xor $6,$23,$23 # 7 cycles from $23 load \n\
734 \n\
735 ldq $2,24($18) \n\
736 xor $22,$23,$23 \n\
737 ldq $3,24($19) \n\
738 xor $24,$25,$25 # 8 cycles from $25 load \n\
739 \n\
740 stq $23,8($17) \n\
741 xor $25,$27,$27 # 8 cycles from $27 load \n\
742 ldq $4,24($20) \n\
743 xor $28,$0,$0 # 7 cycles from $0 load \n\
744 \n\
745 ldq $5,24($21) \n\
746 xor $27,$0,$0 \n\
747 ldq $6,32($17) \n\
748 ldq $7,32($18) \n\
749 \n\
750 stq $0,16($17) \n\
751 xor $1,$2,$2 # 6 cycles from $2 load \n\
752 ldq $22,32($19) \n\
753 xor $3,$4,$4 # 4 cycles from $4 load \n\
754 \n\
755 ldq $23,32($20) \n\
756 xor $2,$4,$4 \n\
757 ldq $24,32($21) \n\
758 ldq $25,40($17) \n\
759 \n\
760 ldq $27,40($18) \n\
761 ldq $28,40($19) \n\
762 ldq $0,40($20) \n\
763 xor $4,$5,$5 # 7 cycles from $5 load \n\
764 \n\
765 stq $5,24($17) \n\
766 xor $6,$7,$7 # 7 cycles from $7 load \n\
767 ldq $1,40($21) \n\
768 ldq $2,48($17) \n\
769 \n\
770 ldq $3,48($18) \n\
771 xor $7,$22,$22 # 7 cycles from $22 load \n\
772 ldq $4,48($19) \n\
773 xor $23,$24,$24 # 6 cycles from $24 load \n\
774 \n\
775 ldq $5,48($20) \n\
776 xor $22,$24,$24 \n\
777 ldq $6,48($21) \n\
778 xor $25,$27,$27 # 7 cycles from $27 load \n\
779 \n\
780 stq $24,32($17) \n\
781 xor $27,$28,$28 # 8 cycles from $28 load \n\
782 ldq $7,56($17) \n\
783 xor $0,$1,$1 # 6 cycles from $1 load \n\
784 \n\
785 ldq $22,56($18) \n\
786 ldq $23,56($19) \n\
787 ldq $24,56($20) \n\
788 ldq $25,56($21) \n\
789 \n\
790 ldq $31,256($17) \n\
791 xor $28,$1,$1 \n\
792 ldq $31,256($18) \n\
793 xor $2,$3,$3 # 9 cycles from $3 load \n\
794 \n\
795 ldq $31,256($19) \n\
796 xor $3,$4,$4 # 9 cycles from $4 load \n\
797 ldq $31,256($20) \n\
798 xor $5,$6,$6 # 8 cycles from $6 load \n\
799 \n\
800 stq $1,40($17) \n\
801 xor $4,$6,$6 \n\
802 xor $7,$22,$22 # 7 cycles from $22 load \n\
803 xor $23,$24,$24 # 6 cycles from $24 load \n\
804 \n\
805 stq $6,48($17) \n\
806 xor $22,$24,$24 \n\
807 ldq $31,256($21) \n\
808 xor $24,$25,$25 # 8 cycles from $25 load \n\
809 \n\
810 stq $25,56($17) \n\
811 subq $16,1,$16 \n\
812 addq $21,64,$21 \n\
813 addq $20,64,$20 \n\
814 \n\
815 addq $19,64,$19 \n\
816 addq $18,64,$18 \n\
817 addq $17,64,$17 \n\
818 bgt $16,5b \n\
819 \n\
820 ret \n\
821 .end xor_alpha_prefetch_5 \n\
822");
823
824static struct xor_block_template xor_block_alpha = {
825 .name = "alpha",
826 .do_2 = xor_alpha_2,
827 .do_3 = xor_alpha_3,
828 .do_4 = xor_alpha_4,
829 .do_5 = xor_alpha_5,
830};
831
832static struct xor_block_template xor_block_alpha_prefetch = {
833 .name = "alpha prefetch",
834 .do_2 = xor_alpha_prefetch_2,
835 .do_3 = xor_alpha_prefetch_3,
836 .do_4 = xor_alpha_prefetch_4,
837 .do_5 = xor_alpha_prefetch_5,
838};
839
840/* For grins, also test the generic routines. */
841#include <asm-generic/xor.h>
842
843#undef XOR_TRY_TEMPLATES
844#define XOR_TRY_TEMPLATES \
845 do { \
846 xor_speed(&xor_block_8regs); \
847 xor_speed(&xor_block_32regs); \
848 xor_speed(&xor_block_alpha); \
849 xor_speed(&xor_block_alpha_prefetch); \
850 } while (0)
851
852/* Force the use of alpha_prefetch if EV6, as it is significantly
853 faster in the cold cache case. */
854#define XOR_SELECT_TEMPLATE(FASTEST) \
855 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)