blob: 2ec0b0abbfaa876fb242b71061b70cdb7dc9db20 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* Copyright 2002 Andi Kleen */
Dave Jones038b0a62006-10-04 03:38:54 -04002
Jan Beulich8d379da2006-09-26 10:52:32 +02003#include <linux/linkage.h>
Tony Luckcbf8b5a2016-03-14 15:33:39 -07004#include <asm/errno.h>
Borislav Petkovcd4d09e2016-01-26 22:12:04 +01005#include <asm/cpufeatures.h>
Fenghua Yu101068c2011-05-17 15:29:16 -07006#include <asm/alternative-asm.h>
Jan Beulich8d379da2006-09-26 10:52:32 +02007
Linus Torvalds1da177e2005-04-16 15:20:36 -07008/*
Borislav Petkove0bc8d12015-02-04 15:36:49 +01009 * We build a jump to memcpy_orig by default which gets NOPped out on
10 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
11 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
12 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
13 */
14
15.weak memcpy
16
17/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070018 * memcpy - Copy a memory block.
19 *
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010020 * Input:
21 * rdi destination
22 * rsi source
23 * rdx count
24 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 * Output:
26 * rax original destination
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010027 */
Borislav Petkove0bc8d12015-02-04 15:36:49 +010028ENTRY(__memcpy)
29ENTRY(memcpy)
30 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
31 "jmp memcpy_erms", X86_FEATURE_ERMS
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010033 movq %rdi, %rax
Jan Beulich2ab56092012-01-26 15:50:55 +000034 movq %rdx, %rcx
35 shrq $3, %rcx
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010036 andl $7, %edx
Jan Beulich8d379da2006-09-26 10:52:32 +020037 rep movsq
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010038 movl %edx, %ecx
Jan Beulich8d379da2006-09-26 10:52:32 +020039 rep movsb
40 ret
Borislav Petkove0bc8d12015-02-04 15:36:49 +010041ENDPROC(memcpy)
42ENDPROC(__memcpy)
Jan Beulich8d379da2006-09-26 10:52:32 +020043
Fenghua Yu101068c2011-05-17 15:29:16 -070044/*
Borislav Petkove0bc8d12015-02-04 15:36:49 +010045 * memcpy_erms() - enhanced fast string memcpy. This is faster and
46 * simpler than memcpy. Use memcpy_erms when possible.
Fenghua Yu101068c2011-05-17 15:29:16 -070047 */
Borislav Petkove0bc8d12015-02-04 15:36:49 +010048ENTRY(memcpy_erms)
Fenghua Yu101068c2011-05-17 15:29:16 -070049 movq %rdi, %rax
Jan Beulich2ab56092012-01-26 15:50:55 +000050 movq %rdx, %rcx
Fenghua Yu101068c2011-05-17 15:29:16 -070051 rep movsb
52 ret
Borislav Petkove0bc8d12015-02-04 15:36:49 +010053ENDPROC(memcpy_erms)
Fenghua Yu101068c2011-05-17 15:29:16 -070054
Borislav Petkove0bc8d12015-02-04 15:36:49 +010055ENTRY(memcpy_orig)
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010056 movq %rdi, %rax
Andi Kleen7bcd3f32006-02-03 21:51:02 +010057
Jan Beulich2ab56092012-01-26 15:50:55 +000058 cmpq $0x20, %rdx
Ma Ling59daa702010-06-29 03:24:25 +080059 jb .Lhandle_tail
60
61 /*
Bart Van Assche9de49662011-05-01 14:09:21 +020062 * We check whether memory false dependence could occur,
Ma Ling59daa702010-06-29 03:24:25 +080063 * then jump to corresponding copy mode.
64 */
65 cmp %dil, %sil
66 jl .Lcopy_backward
Jan Beulich2ab56092012-01-26 15:50:55 +000067 subq $0x20, %rdx
Ma Ling59daa702010-06-29 03:24:25 +080068.Lcopy_forward_loop:
69 subq $0x20, %rdx
70
71 /*
72 * Move in blocks of 4x8 bytes:
73 */
74 movq 0*8(%rsi), %r8
75 movq 1*8(%rsi), %r9
76 movq 2*8(%rsi), %r10
77 movq 3*8(%rsi), %r11
78 leaq 4*8(%rsi), %rsi
79
80 movq %r8, 0*8(%rdi)
81 movq %r9, 1*8(%rdi)
82 movq %r10, 2*8(%rdi)
83 movq %r11, 3*8(%rdi)
84 leaq 4*8(%rdi), %rdi
85 jae .Lcopy_forward_loop
Jan Beulich2ab56092012-01-26 15:50:55 +000086 addl $0x20, %edx
Ma Ling59daa702010-06-29 03:24:25 +080087 jmp .Lhandle_tail
88
89.Lcopy_backward:
90 /*
91 * Calculate copy position to tail.
92 */
93 addq %rdx, %rsi
94 addq %rdx, %rdi
95 subq $0x20, %rdx
96 /*
97 * At most 3 ALU operations in one cycle,
Andy Shevchenkod50ba362013-04-15 12:06:10 +030098 * so append NOPS in the same 16 bytes trunk.
Ma Ling59daa702010-06-29 03:24:25 +080099 */
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100100 .p2align 4
Ma Ling59daa702010-06-29 03:24:25 +0800101.Lcopy_backward_loop:
102 subq $0x20, %rdx
103 movq -1*8(%rsi), %r8
104 movq -2*8(%rsi), %r9
105 movq -3*8(%rsi), %r10
106 movq -4*8(%rsi), %r11
107 leaq -4*8(%rsi), %rsi
108 movq %r8, -1*8(%rdi)
109 movq %r9, -2*8(%rdi)
110 movq %r10, -3*8(%rdi)
111 movq %r11, -4*8(%rdi)
112 leaq -4*8(%rdi), %rdi
113 jae .Lcopy_backward_loop
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100114
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100115 /*
Ma Ling59daa702010-06-29 03:24:25 +0800116 * Calculate copy position to head.
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100117 */
Jan Beulich2ab56092012-01-26 15:50:55 +0000118 addl $0x20, %edx
Ma Ling59daa702010-06-29 03:24:25 +0800119 subq %rdx, %rsi
120 subq %rdx, %rdi
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100121.Lhandle_tail:
Jan Beulich2ab56092012-01-26 15:50:55 +0000122 cmpl $16, %edx
Ma Ling59daa702010-06-29 03:24:25 +0800123 jb .Lless_16bytes
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100124
Ma Ling59daa702010-06-29 03:24:25 +0800125 /*
126 * Move data from 16 bytes to 31 bytes.
127 */
128 movq 0*8(%rsi), %r8
129 movq 1*8(%rsi), %r9
130 movq -2*8(%rsi, %rdx), %r10
131 movq -1*8(%rsi, %rdx), %r11
132 movq %r8, 0*8(%rdi)
133 movq %r9, 1*8(%rdi)
134 movq %r10, -2*8(%rdi, %rdx)
135 movq %r11, -1*8(%rdi, %rdx)
136 retq
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100137 .p2align 4
Ma Ling59daa702010-06-29 03:24:25 +0800138.Lless_16bytes:
Jan Beulich2ab56092012-01-26 15:50:55 +0000139 cmpl $8, %edx
Ma Ling59daa702010-06-29 03:24:25 +0800140 jb .Lless_8bytes
141 /*
142 * Move data from 8 bytes to 15 bytes.
143 */
144 movq 0*8(%rsi), %r8
145 movq -1*8(%rsi, %rdx), %r9
146 movq %r8, 0*8(%rdi)
147 movq %r9, -1*8(%rdi, %rdx)
148 retq
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100149 .p2align 4
Ma Ling59daa702010-06-29 03:24:25 +0800150.Lless_8bytes:
Jan Beulich2ab56092012-01-26 15:50:55 +0000151 cmpl $4, %edx
Ma Ling59daa702010-06-29 03:24:25 +0800152 jb .Lless_3bytes
153
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 movl (%rsi), %ecx
158 movl -4(%rsi, %rdx), %r8d
159 movl %ecx, (%rdi)
160 movl %r8d, -4(%rdi, %rdx)
161 retq
162 .p2align 4
163.Lless_3bytes:
Jan Beulich9d8e2272012-01-26 15:55:32 +0000164 subl $1, %edx
165 jb .Lend
Ma Ling59daa702010-06-29 03:24:25 +0800166 /*
167 * Move data from 1 bytes to 3 bytes.
168 */
Jan Beulich9d8e2272012-01-26 15:55:32 +0000169 movzbl (%rsi), %ecx
170 jz .Lstore_1byte
171 movzbq 1(%rsi), %r8
172 movzbq (%rsi, %rdx), %r9
173 movb %r8b, 1(%rdi)
174 movb %r9b, (%rdi, %rdx)
175.Lstore_1byte:
176 movb %cl, (%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100177
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100178.Lend:
Ma Ling59daa702010-06-29 03:24:25 +0800179 retq
Borislav Petkove0bc8d12015-02-04 15:36:49 +0100180ENDPROC(memcpy_orig)
Tony Luck92b07292016-02-18 11:47:26 -0800181
182#ifndef CONFIG_UML
183/*
184 * memcpy_mcsafe - memory copy with machine check exception handling
185 * Note that we only catch machine checks when reading the source addresses.
186 * Writes to target are posted and don't generate machine checks.
187 */
188ENTRY(memcpy_mcsafe)
189 cmpl $8, %edx
190 /* Less than 8 bytes? Go to byte copy loop */
191 jb .L_no_whole_words
192
193 /* Check for bad alignment of source */
194 testl $7, %esi
195 /* Already aligned */
196 jz .L_8byte_aligned
197
198 /* Copy one byte at a time until source is 8-byte aligned */
199 movl %esi, %ecx
200 andl $7, %ecx
201 subl $8, %ecx
202 negl %ecx
203 subl %ecx, %edx
204.L_copy_leading_bytes:
205 movb (%rsi), %al
206 movb %al, (%rdi)
207 incq %rsi
208 incq %rdi
209 decl %ecx
210 jnz .L_copy_leading_bytes
211
212.L_8byte_aligned:
213 /* Figure out how many whole cache lines (64-bytes) to copy */
214 movl %edx, %ecx
215 andl $63, %edx
216 shrl $6, %ecx
217 jz .L_no_whole_cache_lines
218
219 /* Loop copying whole cache lines */
220.L_cache_w0: movq (%rsi), %r8
221.L_cache_w1: movq 1*8(%rsi), %r9
222.L_cache_w2: movq 2*8(%rsi), %r10
223.L_cache_w3: movq 3*8(%rsi), %r11
224 movq %r8, (%rdi)
225 movq %r9, 1*8(%rdi)
226 movq %r10, 2*8(%rdi)
227 movq %r11, 3*8(%rdi)
228.L_cache_w4: movq 4*8(%rsi), %r8
229.L_cache_w5: movq 5*8(%rsi), %r9
230.L_cache_w6: movq 6*8(%rsi), %r10
231.L_cache_w7: movq 7*8(%rsi), %r11
232 movq %r8, 4*8(%rdi)
233 movq %r9, 5*8(%rdi)
234 movq %r10, 6*8(%rdi)
235 movq %r11, 7*8(%rdi)
236 leaq 64(%rsi), %rsi
237 leaq 64(%rdi), %rdi
238 decl %ecx
239 jnz .L_cache_w0
240
241 /* Are there any trailing 8-byte words? */
242.L_no_whole_cache_lines:
243 movl %edx, %ecx
244 andl $7, %edx
245 shrl $3, %ecx
246 jz .L_no_whole_words
247
248 /* Copy trailing words */
249.L_copy_trailing_words:
250 movq (%rsi), %r8
251 mov %r8, (%rdi)
252 leaq 8(%rsi), %rsi
253 leaq 8(%rdi), %rdi
254 decl %ecx
255 jnz .L_copy_trailing_words
256
257 /* Any trailing bytes? */
258.L_no_whole_words:
259 andl %edx, %edx
260 jz .L_done_memcpy_trap
261
262 /* Copy trailing bytes */
263 movl %edx, %ecx
264.L_copy_trailing_bytes:
265 movb (%rsi), %al
266 movb %al, (%rdi)
267 incq %rsi
268 incq %rdi
269 decl %ecx
270 jnz .L_copy_trailing_bytes
271
Tony Luckcbf8b5a2016-03-14 15:33:39 -0700272 /* Copy successful. Return zero */
Tony Luck92b07292016-02-18 11:47:26 -0800273.L_done_memcpy_trap:
274 xorq %rax, %rax
275 ret
276ENDPROC(memcpy_mcsafe)
277
278 .section .fixup, "ax"
Tony Luckcbf8b5a2016-03-14 15:33:39 -0700279 /* Return -EFAULT for any failure */
Tony Luck92b07292016-02-18 11:47:26 -0800280.L_memcpy_mcsafe_fail:
Tony Luckcbf8b5a2016-03-14 15:33:39 -0700281 mov $-EFAULT, %rax
Tony Luck92b07292016-02-18 11:47:26 -0800282 ret
283
284 .previous
285
286 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
287 _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
288 _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
289 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
290 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
291 _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
292 _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
293 _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
294 _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
295 _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
296 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
297#endif