blob: f0dba36578ea0765827efc68f9448dd38f12e6f0 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
Jan Beulich8d379da2006-09-26 10:52:32 +02008#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070011
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 * ecx sum (32bit)
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
3310:
34 .section __ex_table,"a"
35 .align 8
36 .quad 10b,.Lbad_source
37 .previous
38 .endm
39
40 .macro dest
4120:
42 .section __ex_table,"a"
43 .align 8
44 .quad 20b,.Lbad_dest
45 .previous
46 .endm
47
48 .macro ignore L=.Lignore
4930:
50 .section __ex_table,"a"
51 .align 8
52 .quad 30b,\L
53 .previous
54 .endm
55
56
Jan Beulich8d379da2006-09-26 10:52:32 +020057ENTRY(csum_partial_copy_generic)
58 CFI_STARTPROC
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 cmpl $3*64,%edx
60 jle .Lignore
61
62.Lignore:
63 subq $7*8,%rsp
Jan Beulich8d379da2006-09-26 10:52:32 +020064 CFI_ADJUST_CFA_OFFSET 7*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070065 movq %rbx,2*8(%rsp)
Jan Beulich8d379da2006-09-26 10:52:32 +020066 CFI_REL_OFFSET rbx, 2*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 movq %r12,3*8(%rsp)
Jan Beulich8d379da2006-09-26 10:52:32 +020068 CFI_REL_OFFSET r12, 3*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 movq %r14,4*8(%rsp)
Jan Beulich8d379da2006-09-26 10:52:32 +020070 CFI_REL_OFFSET r14, 4*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 movq %r13,5*8(%rsp)
Jan Beulich8d379da2006-09-26 10:52:32 +020072 CFI_REL_OFFSET r13, 5*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070073 movq %rbp,6*8(%rsp)
Jan Beulich8d379da2006-09-26 10:52:32 +020074 CFI_REL_OFFSET rbp, 6*8
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76 movq %r8,(%rsp)
77 movq %r9,1*8(%rsp)
78
79 movl %ecx,%eax
80 movl %edx,%ecx
81
82 xorl %r9d,%r9d
83 movq %rcx,%r12
84
85 shrq $6,%r12
86 jz .Lhandle_tail /* < 64 */
87
88 clc
89
90 /* main loop. clear in 64 byte blocks */
91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92 /* r11: temp3, rdx: temp4, r12 loopcnt */
93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
94 .p2align 4
95.Lloop:
96 source
97 movq (%rdi),%rbx
98 source
99 movq 8(%rdi),%r8
100 source
101 movq 16(%rdi),%r11
102 source
103 movq 24(%rdi),%rdx
104
105 source
106 movq 32(%rdi),%r10
107 source
108 movq 40(%rdi),%rbp
109 source
110 movq 48(%rdi),%r14
111 source
112 movq 56(%rdi),%r13
113
114 ignore 2f
115 prefetcht0 5*64(%rdi)
1162:
117 adcq %rbx,%rax
118 adcq %r8,%rax
119 adcq %r11,%rax
120 adcq %rdx,%rax
121 adcq %r10,%rax
122 adcq %rbp,%rax
123 adcq %r14,%rax
124 adcq %r13,%rax
125
126 decl %r12d
127
128 dest
129 movq %rbx,(%rsi)
130 dest
131 movq %r8,8(%rsi)
132 dest
133 movq %r11,16(%rsi)
134 dest
135 movq %rdx,24(%rsi)
136
137 dest
138 movq %r10,32(%rsi)
139 dest
140 movq %rbp,40(%rsi)
141 dest
142 movq %r14,48(%rsi)
143 dest
144 movq %r13,56(%rsi)
145
1463:
147
148 leaq 64(%rdi),%rdi
149 leaq 64(%rsi),%rsi
150
151 jnz .Lloop
152
153 adcq %r9,%rax
154
155 /* do last upto 56 bytes */
156.Lhandle_tail:
157 /* ecx: count */
158 movl %ecx,%r10d
159 andl $63,%ecx
160 shrl $3,%ecx
161 jz .Lfold
162 clc
163 .p2align 4
164.Lloop_8:
165 source
166 movq (%rdi),%rbx
167 adcq %rbx,%rax
168 decl %ecx
169 dest
170 movq %rbx,(%rsi)
171 leaq 8(%rsi),%rsi /* preserve carry */
172 leaq 8(%rdi),%rdi
173 jnz .Lloop_8
174 adcq %r9,%rax /* add in carry */
175
176.Lfold:
177 /* reduce checksum to 32bits */
178 movl %eax,%ebx
179 shrq $32,%rax
180 addl %ebx,%eax
181 adcl %r9d,%eax
182
183 /* do last upto 6 bytes */
184.Lhandle_7:
185 movl %r10d,%ecx
186 andl $7,%ecx
187 shrl $1,%ecx
188 jz .Lhandle_1
189 movl $2,%edx
190 xorl %ebx,%ebx
191 clc
192 .p2align 4
193.Lloop_1:
194 source
195 movw (%rdi),%bx
196 adcl %ebx,%eax
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 decl %ecx
Dave Peterson92ed0222005-07-29 22:59:20 -0700198 dest
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 movw %bx,(%rsi)
200 leaq 2(%rdi),%rdi
201 leaq 2(%rsi),%rsi
202 jnz .Lloop_1
203 adcl %r9d,%eax /* add in carry */
204
205 /* handle last odd byte */
206.Lhandle_1:
207 testl $1,%r10d
208 jz .Lende
209 xorl %ebx,%ebx
210 source
211 movb (%rdi),%bl
212 dest
213 movb %bl,(%rsi)
214 addl %ebx,%eax
215 adcl %r9d,%eax /* carry */
216
Jan Beulich8d379da2006-09-26 10:52:32 +0200217 CFI_REMEMBER_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218.Lende:
219 movq 2*8(%rsp),%rbx
Jan Beulich8d379da2006-09-26 10:52:32 +0200220 CFI_RESTORE rbx
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 movq 3*8(%rsp),%r12
Jan Beulich8d379da2006-09-26 10:52:32 +0200222 CFI_RESTORE r12
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 movq 4*8(%rsp),%r14
Jan Beulich8d379da2006-09-26 10:52:32 +0200224 CFI_RESTORE r14
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 movq 5*8(%rsp),%r13
Jan Beulich8d379da2006-09-26 10:52:32 +0200226 CFI_RESTORE r13
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 movq 6*8(%rsp),%rbp
Jan Beulich8d379da2006-09-26 10:52:32 +0200228 CFI_RESTORE rbp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 addq $7*8,%rsp
Jan Beulich8d379da2006-09-26 10:52:32 +0200230 CFI_ADJUST_CFA_OFFSET -7*8
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 ret
Jan Beulich8d379da2006-09-26 10:52:32 +0200232 CFI_RESTORE_STATE
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233
234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source:
236 movq (%rsp),%rax
237 testq %rax,%rax
238 jz .Lende
239 movl $-EFAULT,(%rax)
240 jmp .Lende
241
242.Lbad_dest:
243 movq 8(%rsp),%rax
244 testq %rax,%rax
245 jz .Lende
246 movl $-EFAULT,(%rax)
247 jmp .Lende
Jan Beulich8d379da2006-09-26 10:52:32 +0200248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)