blob: 0ea0ddc875a7128203b8a0c8a7c4f5784fb2b680 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* Copyright 2002 Andi Kleen */
Dave Jones038b0a62006-10-04 03:38:54 -04002
Jan Beulich8d379da2006-09-26 10:52:32 +02003#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6
Linus Torvalds1da177e2005-04-16 15:20:36 -07007/*
8 * memcpy - Copy a memory block.
9 *
10 * Input:
11 * rdi destination
12 * rsi source
13 * rdx count
14 *
15 * Output:
16 * rax original destination
17 */
18
Jan Beulich8d379da2006-09-26 10:52:32 +020019 ALIGN
20memcpy_c:
21 CFI_STARTPROC
22 movq %rdi,%rax
23 movl %edx,%ecx
24 shrl $3,%ecx
25 andl $7,%edx
26 rep movsq
27 movl %edx,%ecx
28 rep movsb
29 ret
30 CFI_ENDPROC
31ENDPROC(memcpy_c)
32
33ENTRY(__memcpy)
34ENTRY(memcpy)
35 CFI_STARTPROC
Andi Kleen7bcd3f32006-02-03 21:51:02 +010036 pushq %rbx
Jan Beulich8d379da2006-09-26 10:52:32 +020037 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
Andi Kleen7bcd3f32006-02-03 21:51:02 +010039 movq %rdi,%rax
40
41 movl %edx,%ecx
42 shrl $6,%ecx
43 jz .Lhandle_tail
44
45 .p2align 4
46.Lloop_64:
47 decl %ecx
48
49 movq (%rsi),%r11
50 movq 8(%rsi),%r8
51
52 movq %r11,(%rdi)
53 movq %r8,1*8(%rdi)
54
55 movq 2*8(%rsi),%r9
56 movq 3*8(%rsi),%r10
57
58 movq %r9,2*8(%rdi)
59 movq %r10,3*8(%rdi)
60
61 movq 4*8(%rsi),%r11
62 movq 5*8(%rsi),%r8
63
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64
76
77.Lhandle_tail:
78 movl %edx,%ecx
79 andl $63,%ecx
80 shrl $3,%ecx
81 jz .Lhandle_7
82 .p2align 4
83.Lloop_8:
84 decl %ecx
85 movq (%rsi),%r8
86 movq %r8,(%rdi)
87 leaq 8(%rdi),%rdi
88 leaq 8(%rsi),%rsi
89 jnz .Lloop_8
90
91.Lhandle_7:
92 movl %edx,%ecx
93 andl $7,%ecx
94 jz .Lende
95 .p2align 4
96.Lloop_1:
97 movb (%rsi),%r8b
98 movb %r8b,(%rdi)
99 incq %rdi
100 incq %rsi
101 decl %ecx
102 jnz .Lloop_1
103
104.Lende:
105 popq %rbx
Jan Beulich8d379da2006-09-26 10:52:32 +0200106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100108 ret
109.Lfinal:
Jan Beulich8d379da2006-09-26 10:52:32 +0200110 CFI_ENDPROC
111ENDPROC(memcpy)
112ENDPROC(__memcpy)
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100113
114 /* Some CPUs run faster using the string copy instructions.
115 It is also a lot simpler. Use this when possible */
116
Jan Beulich8d379da2006-09-26 10:52:32 +0200117 .section .altinstr_replacement,"ax"
1181: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202:
121 .previous
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100122 .section .altinstructions,"a"
123 .align 8
Jan Beulich8d379da2006-09-26 10:52:32 +0200124 .quad memcpy
125 .quad 1b
126 .byte X86_FEATURE_REP_GOOD
127 .byte .Lfinal - memcpy
128 .byte 2b - 1b
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100129 .previous