blob: 5196762b3b0ea3017cbb43e97f253c68055a1a7f [file] [log] [blame]
Andi Kleen0812a572007-02-13 13:26:19 +01001/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
10#define FIX_ALIGNMENT 1
11
12#include <asm/current.h>
13#include <asm/asm-offsets.h>
14#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17/*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40
41#ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */
43 movl %edi,%ecx
44 andl $7,%ecx
45 jnz .Lbad_alignment
46.Lafter_bad_alignment:
47#endif
48
49 movq %rdx,%rcx
50
51 movl $64,%ebx
52 shrq $6,%rdx
53 decq %rdx
54 js .Lhandle_tail
55
56 .p2align 4
57.Lloop:
58.Ls1: movq (%rsi),%r11
59.Ls2: movq 1*8(%rsi),%r8
60.Ls3: movq 2*8(%rsi),%r9
61.Ls4: movq 3*8(%rsi),%r10
62.Ld1: movnti %r11,(%rdi)
63.Ld2: movnti %r8,1*8(%rdi)
64.Ld3: movnti %r9,2*8(%rdi)
65.Ld4: movnti %r10,3*8(%rdi)
66
67.Ls5: movq 4*8(%rsi),%r11
68.Ls6: movq 5*8(%rsi),%r8
69.Ls7: movq 6*8(%rsi),%r9
70.Ls8: movq 7*8(%rsi),%r10
71.Ld5: movnti %r11,4*8(%rdi)
72.Ld6: movnti %r8,5*8(%rdi)
73.Ld7: movnti %r9,6*8(%rdi)
74.Ld8: movnti %r10,7*8(%rdi)
75
76 dec %rdx
77
78 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84.Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91.Lloop_8:
92.Ls9: movq (%rsi),%r8
93.Ld9: movnti %r8,(%rdi)
94 decl %ecx
95 leaq 8(%rdi),%rdi
96 leaq 8(%rsi),%rsi
97 jnz .Lloop_8
98
99.Lhandle_7:
100 movl %edx,%ecx
101 andl $7,%ecx
102 jz .Lende
103 .p2align 4
104.Lloop_1:
105.Ls10: movb (%rsi),%bl
106.Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi
109 decl %ecx
110 jnz .Lloop_1
111
112 CFI_REMEMBER_STATE
113.Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
Nick Piggindf1bdc02007-10-13 03:06:00 +0200120 sfence
Andi Kleen0812a572007-02-13 13:26:19 +0100121 ret
122 CFI_RESTORE_STATE
123
124#ifdef FIX_ALIGNMENT
125 /* align destination */
126 .p2align 4
127.Lbad_alignment:
128 movl $8,%r9d
129 subl %ecx,%r9d
130 movl %r9d,%ecx
131 cmpq %r9,%rdx
132 jz .Lhandle_7
133 js .Lhandle_7
134.Lalign_1:
135.Ls11: movb (%rsi),%bl
136.Ld11: movb %bl,(%rdi)
137 incq %rsi
138 incq %rdi
139 decl %ecx
140 jnz .Lalign_1
141 subq %r9,%rdx
142 jmp .Lafter_bad_alignment
143#endif
144
145 /* table sorted by exception address */
146 .section __ex_table,"a"
147 .align 8
148 .quad .Ls1,.Ls1e
149 .quad .Ls2,.Ls2e
150 .quad .Ls3,.Ls3e
151 .quad .Ls4,.Ls4e
152 .quad .Ld1,.Ls1e
153 .quad .Ld2,.Ls2e
154 .quad .Ld3,.Ls3e
155 .quad .Ld4,.Ls4e
156 .quad .Ls5,.Ls5e
157 .quad .Ls6,.Ls6e
158 .quad .Ls7,.Ls7e
159 .quad .Ls8,.Ls8e
160 .quad .Ld5,.Ls5e
161 .quad .Ld6,.Ls6e
162 .quad .Ld7,.Ls7e
163 .quad .Ld8,.Ls8e
164 .quad .Ls9,.Le_quad
165 .quad .Ld9,.Le_quad
166 .quad .Ls10,.Le_byte
167 .quad .Ld10,.Le_byte
168#ifdef FIX_ALIGNMENT
169 .quad .Ls11,.Lzero_rest
170 .quad .Ld11,.Lzero_rest
171#endif
172 .quad .Le5,.Le_zero
173 .previous
174
175 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
176 pessimistic side. this is gross. it would be better to fix the
177 interface. */
178 /* eax: zero, ebx: 64 */
179.Ls1e: addl $8,%eax
180.Ls2e: addl $8,%eax
181.Ls3e: addl $8,%eax
182.Ls4e: addl $8,%eax
183.Ls5e: addl $8,%eax
184.Ls6e: addl $8,%eax
185.Ls7e: addl $8,%eax
186.Ls8e: addl $8,%eax
187 addq %rbx,%rdi /* +64 */
188 subq %rax,%rdi /* correct destination with computed offset */
189
190 shlq $6,%rdx /* loop counter * 64 (stride length) */
191 addq %rax,%rdx /* add offset to loopcnt */
192 andl $63,%ecx /* remaining bytes */
193 addq %rcx,%rdx /* add them */
194 jmp .Lzero_rest
195
196 /* exception on quad word loop in tail handling */
197 /* ecx: loopcnt/8, %edx: length, rdi: correct */
198.Le_quad:
199 shll $3,%ecx
200 andl $7,%edx
201 addl %ecx,%edx
202 /* edx: bytes to zero, rdi: dest, eax:zero */
203.Lzero_rest:
204 cmpl $0,(%rsp) /* zero flag set? */
205 jz .Le_zero
206 movq %rdx,%rcx
207.Le_byte:
208 xorl %eax,%eax
209.Le5: rep
210 stosb
211 /* when there is another exception while zeroing the rest just return */
212.Le_zero:
213 movq %rdx,%rax
214 jmp .Lende
215 CFI_ENDPROC
216ENDPROC(__copy_user_nocache)
217
218