Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
| 3 | * |
| 4 | * This file is subject to the terms and conditions of the GNU General Public |
| 5 | * License. See the file COPYING in the main directory of this archive |
| 6 | * for more details. No warranty for anything given at all. |
| 7 | */ |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 8 | #include <linux/linkage.h> |
| 9 | #include <asm/dwarf2.h> |
| 10 | #include <asm/errno.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 | |
| 12 | /* |
| 13 | * Checksum copy with exception handling. |
| 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
| 15 | * destination is zeroed. |
| 16 | * |
| 17 | * Input |
| 18 | * rdi source |
| 19 | * rsi destination |
| 20 | * edx len (32bit) |
| 21 | * ecx sum (32bit) |
| 22 | * r8 src_err_ptr (int) |
| 23 | * r9 dst_err_ptr (int) |
| 24 | * |
| 25 | * Output |
| 26 | * eax 64bit sum. undefined in case of exception. |
| 27 | * |
| 28 | * Wrappers need to take care of valid exception sum and zeroing. |
| 29 | * They also should align source or destination to 8 bytes. |
| 30 | */ |
| 31 | |
| 32 | .macro source |
| 33 | 10: |
| 34 | .section __ex_table,"a" |
| 35 | .align 8 |
| 36 | .quad 10b,.Lbad_source |
| 37 | .previous |
| 38 | .endm |
| 39 | |
| 40 | .macro dest |
| 41 | 20: |
| 42 | .section __ex_table,"a" |
| 43 | .align 8 |
| 44 | .quad 20b,.Lbad_dest |
| 45 | .previous |
| 46 | .endm |
| 47 | |
| 48 | .macro ignore L=.Lignore |
| 49 | 30: |
| 50 | .section __ex_table,"a" |
| 51 | .align 8 |
| 52 | .quad 30b,\L |
| 53 | .previous |
| 54 | .endm |
| 55 | |
| 56 | |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 57 | ENTRY(csum_partial_copy_generic) |
| 58 | CFI_STARTPROC |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 59 | cmpl $3*64,%edx |
| 60 | jle .Lignore |
| 61 | |
| 62 | .Lignore: |
| 63 | subq $7*8,%rsp |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 65 | movq %rbx,2*8(%rsp) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 66 | CFI_REL_OFFSET rbx, 2*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 67 | movq %r12,3*8(%rsp) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 68 | CFI_REL_OFFSET r12, 3*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 69 | movq %r14,4*8(%rsp) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 70 | CFI_REL_OFFSET r14, 4*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | movq %r13,5*8(%rsp) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 72 | CFI_REL_OFFSET r13, 5*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 73 | movq %rbp,6*8(%rsp) |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 74 | CFI_REL_OFFSET rbp, 6*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 | |
| 76 | movq %r8,(%rsp) |
| 77 | movq %r9,1*8(%rsp) |
| 78 | |
| 79 | movl %ecx,%eax |
| 80 | movl %edx,%ecx |
| 81 | |
| 82 | xorl %r9d,%r9d |
| 83 | movq %rcx,%r12 |
| 84 | |
| 85 | shrq $6,%r12 |
| 86 | jz .Lhandle_tail /* < 64 */ |
| 87 | |
| 88 | clc |
| 89 | |
| 90 | /* main loop. clear in 64 byte blocks */ |
| 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
| 92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
| 93 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ |
| 94 | .p2align 4 |
| 95 | .Lloop: |
| 96 | source |
| 97 | movq (%rdi),%rbx |
| 98 | source |
| 99 | movq 8(%rdi),%r8 |
| 100 | source |
| 101 | movq 16(%rdi),%r11 |
| 102 | source |
| 103 | movq 24(%rdi),%rdx |
| 104 | |
| 105 | source |
| 106 | movq 32(%rdi),%r10 |
| 107 | source |
| 108 | movq 40(%rdi),%rbp |
| 109 | source |
| 110 | movq 48(%rdi),%r14 |
| 111 | source |
| 112 | movq 56(%rdi),%r13 |
| 113 | |
| 114 | ignore 2f |
| 115 | prefetcht0 5*64(%rdi) |
| 116 | 2: |
| 117 | adcq %rbx,%rax |
| 118 | adcq %r8,%rax |
| 119 | adcq %r11,%rax |
| 120 | adcq %rdx,%rax |
| 121 | adcq %r10,%rax |
| 122 | adcq %rbp,%rax |
| 123 | adcq %r14,%rax |
| 124 | adcq %r13,%rax |
| 125 | |
| 126 | decl %r12d |
| 127 | |
| 128 | dest |
| 129 | movq %rbx,(%rsi) |
| 130 | dest |
| 131 | movq %r8,8(%rsi) |
| 132 | dest |
| 133 | movq %r11,16(%rsi) |
| 134 | dest |
| 135 | movq %rdx,24(%rsi) |
| 136 | |
| 137 | dest |
| 138 | movq %r10,32(%rsi) |
| 139 | dest |
| 140 | movq %rbp,40(%rsi) |
| 141 | dest |
| 142 | movq %r14,48(%rsi) |
| 143 | dest |
| 144 | movq %r13,56(%rsi) |
| 145 | |
| 146 | 3: |
| 147 | |
| 148 | leaq 64(%rdi),%rdi |
| 149 | leaq 64(%rsi),%rsi |
| 150 | |
| 151 | jnz .Lloop |
| 152 | |
| 153 | adcq %r9,%rax |
| 154 | |
| 155 | /* do last upto 56 bytes */ |
| 156 | .Lhandle_tail: |
| 157 | /* ecx: count */ |
| 158 | movl %ecx,%r10d |
| 159 | andl $63,%ecx |
| 160 | shrl $3,%ecx |
| 161 | jz .Lfold |
| 162 | clc |
| 163 | .p2align 4 |
| 164 | .Lloop_8: |
| 165 | source |
| 166 | movq (%rdi),%rbx |
| 167 | adcq %rbx,%rax |
| 168 | decl %ecx |
| 169 | dest |
| 170 | movq %rbx,(%rsi) |
| 171 | leaq 8(%rsi),%rsi /* preserve carry */ |
| 172 | leaq 8(%rdi),%rdi |
| 173 | jnz .Lloop_8 |
| 174 | adcq %r9,%rax /* add in carry */ |
| 175 | |
| 176 | .Lfold: |
| 177 | /* reduce checksum to 32bits */ |
| 178 | movl %eax,%ebx |
| 179 | shrq $32,%rax |
| 180 | addl %ebx,%eax |
| 181 | adcl %r9d,%eax |
| 182 | |
| 183 | /* do last upto 6 bytes */ |
| 184 | .Lhandle_7: |
| 185 | movl %r10d,%ecx |
| 186 | andl $7,%ecx |
| 187 | shrl $1,%ecx |
| 188 | jz .Lhandle_1 |
| 189 | movl $2,%edx |
| 190 | xorl %ebx,%ebx |
| 191 | clc |
| 192 | .p2align 4 |
| 193 | .Lloop_1: |
| 194 | source |
| 195 | movw (%rdi),%bx |
| 196 | adcl %ebx,%eax |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 197 | decl %ecx |
Dave Peterson | 92ed022 | 2005-07-29 22:59:20 -0700 | [diff] [blame] | 198 | dest |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 199 | movw %bx,(%rsi) |
| 200 | leaq 2(%rdi),%rdi |
| 201 | leaq 2(%rsi),%rsi |
| 202 | jnz .Lloop_1 |
| 203 | adcl %r9d,%eax /* add in carry */ |
| 204 | |
| 205 | /* handle last odd byte */ |
| 206 | .Lhandle_1: |
| 207 | testl $1,%r10d |
| 208 | jz .Lende |
| 209 | xorl %ebx,%ebx |
| 210 | source |
| 211 | movb (%rdi),%bl |
| 212 | dest |
| 213 | movb %bl,(%rsi) |
| 214 | addl %ebx,%eax |
| 215 | adcl %r9d,%eax /* carry */ |
| 216 | |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 217 | CFI_REMEMBER_STATE |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 218 | .Lende: |
| 219 | movq 2*8(%rsp),%rbx |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 220 | CFI_RESTORE rbx |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | movq 3*8(%rsp),%r12 |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 222 | CFI_RESTORE r12 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | movq 4*8(%rsp),%r14 |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 224 | CFI_RESTORE r14 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | movq 5*8(%rsp),%r13 |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 226 | CFI_RESTORE r13 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | movq 6*8(%rsp),%rbp |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 228 | CFI_RESTORE rbp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | addq $7*8,%rsp |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 231 | ret |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 232 | CFI_RESTORE_STATE |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 233 | |
| 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
| 235 | .Lbad_source: |
| 236 | movq (%rsp),%rax |
| 237 | testq %rax,%rax |
| 238 | jz .Lende |
| 239 | movl $-EFAULT,(%rax) |
| 240 | jmp .Lende |
| 241 | |
| 242 | .Lbad_dest: |
| 243 | movq 8(%rsp),%rax |
| 244 | testq %rax,%rax |
| 245 | jz .Lende |
| 246 | movl $-EFAULT,(%rax) |
| 247 | jmp .Lende |
Jan Beulich | 8d379da | 2006-09-26 10:52:32 +0200 | [diff] [blame] | 248 | CFI_ENDPROC |
| 249 | ENDPROC(csum_partial_copy_generic) |