blob: 54e3f7eab8e9aecfa95c5df8378b7d7c22f455ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * arch/ia64/lib/xor.S
3 *
4 * Optimized RAID-5 checksumming functions for IA-64.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16#include <asm/asmmacro.h>
17
18GLOBAL_ENTRY(xor_ia64_2)
19 .prologue
20 .fframe 0
21 .save ar.pfs, r31
22 alloc r31 = ar.pfs, 3, 0, 13, 16
23 .save ar.lc, r30
24 mov r30 = ar.lc
25 .save pr, r29
26 mov r29 = pr
27 ;;
28 .body
29 mov r8 = in1
30 mov ar.ec = 6 + 2
31 shr in0 = in0, 3
32 ;;
33 adds in0 = -1, in0
34 mov r16 = in1
35 mov r17 = in2
36 ;;
37 mov ar.lc = in0
38 mov pr.rot = 1 << 16
39 ;;
40 .rotr s1[6+1], s2[6+1], d[2]
41 .rotp p[6+2]
420:
43(p[0]) ld8.nta s1[0] = [r16], 8
44(p[0]) ld8.nta s2[0] = [r17], 8
45(p[6]) xor d[0] = s1[6], s2[6]
46(p[6+1])st8.nta [r8] = d[1], 8
47 nop.f 0
48 br.ctop.dptk.few 0b
49 ;;
50 mov ar.lc = r30
51 mov pr = r29, -1
52 br.ret.sptk.few rp
53END(xor_ia64_2)
54
55GLOBAL_ENTRY(xor_ia64_3)
56 .prologue
57 .fframe 0
58 .save ar.pfs, r31
59 alloc r31 = ar.pfs, 4, 0, 20, 24
60 .save ar.lc, r30
61 mov r30 = ar.lc
62 .save pr, r29
63 mov r29 = pr
64 ;;
65 .body
66 mov r8 = in1
67 mov ar.ec = 6 + 2
68 shr in0 = in0, 3
69 ;;
70 adds in0 = -1, in0
71 mov r16 = in1
72 mov r17 = in2
73 ;;
74 mov r18 = in3
75 mov ar.lc = in0
76 mov pr.rot = 1 << 16
77 ;;
78 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
79 .rotp p[6+2]
800:
81(p[0]) ld8.nta s1[0] = [r16], 8
82(p[0]) ld8.nta s2[0] = [r17], 8
83(p[6]) xor d[0] = s1[6], s2[6]
84 ;;
85(p[0]) ld8.nta s3[0] = [r18], 8
86(p[6+1])st8.nta [r8] = d[1], 8
87(p[6]) xor d[0] = d[0], s3[6]
88 br.ctop.dptk.few 0b
89 ;;
90 mov ar.lc = r30
91 mov pr = r29, -1
92 br.ret.sptk.few rp
93END(xor_ia64_3)
94
95GLOBAL_ENTRY(xor_ia64_4)
96 .prologue
97 .fframe 0
98 .save ar.pfs, r31
99 alloc r31 = ar.pfs, 5, 0, 27, 32
100 .save ar.lc, r30
101 mov r30 = ar.lc
102 .save pr, r29
103 mov r29 = pr
104 ;;
105 .body
106 mov r8 = in1
107 mov ar.ec = 6 + 2
108 shr in0 = in0, 3
109 ;;
110 adds in0 = -1, in0
111 mov r16 = in1
112 mov r17 = in2
113 ;;
114 mov r18 = in3
115 mov ar.lc = in0
116 mov pr.rot = 1 << 16
117 mov r19 = in4
118 ;;
119 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
120 .rotp p[6+2]
1210:
122(p[0]) ld8.nta s1[0] = [r16], 8
123(p[0]) ld8.nta s2[0] = [r17], 8
124(p[6]) xor d[0] = s1[6], s2[6]
125(p[0]) ld8.nta s3[0] = [r18], 8
126(p[0]) ld8.nta s4[0] = [r19], 8
127(p[6]) xor r20 = s3[6], s4[6]
128 ;;
129(p[6+1])st8.nta [r8] = d[1], 8
130(p[6]) xor d[0] = d[0], r20
131 br.ctop.dptk.few 0b
132 ;;
133 mov ar.lc = r30
134 mov pr = r29, -1
135 br.ret.sptk.few rp
136END(xor_ia64_4)
137
138GLOBAL_ENTRY(xor_ia64_5)
139 .prologue
140 .fframe 0
141 .save ar.pfs, r31
142 alloc r31 = ar.pfs, 6, 0, 34, 40
143 .save ar.lc, r30
144 mov r30 = ar.lc
145 .save pr, r29
146 mov r29 = pr
147 ;;
148 .body
149 mov r8 = in1
150 mov ar.ec = 6 + 2
151 shr in0 = in0, 3
152 ;;
153 adds in0 = -1, in0
154 mov r16 = in1
155 mov r17 = in2
156 ;;
157 mov r18 = in3
158 mov ar.lc = in0
159 mov pr.rot = 1 << 16
160 mov r19 = in4
161 mov r20 = in5
162 ;;
163 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
164 .rotp p[6+2]
1650:
166(p[0]) ld8.nta s1[0] = [r16], 8
167(p[0]) ld8.nta s2[0] = [r17], 8
168(p[6]) xor d[0] = s1[6], s2[6]
169(p[0]) ld8.nta s3[0] = [r18], 8
170(p[0]) ld8.nta s4[0] = [r19], 8
171(p[6]) xor r21 = s3[6], s4[6]
172 ;;
173(p[0]) ld8.nta s5[0] = [r20], 8
174(p[6+1])st8.nta [r8] = d[1], 8
175(p[6]) xor d[0] = d[0], r21
176 ;;
177(p[6]) xor d[0] = d[0], s5[6]
178 nop.f 0
179 br.ctop.dptk.few 0b
180 ;;
181 mov ar.lc = r30
182 mov pr = r29, -1
183 br.ret.sptk.few rp
184END(xor_ia64_5)