blob: 967b22fa7d07d59d21985eebdbb56298b81b8db4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* Copyright 2002 Andi Kleen */
2
Jan Beulich8d379da2006-09-26 10:52:32 +02003#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7
Linus Torvalds1da177e2005-04-16 15:20:36 -07008/*
9 * memcpy - Copy a memory block.
10 *
11 * Input:
12 * rdi destination
13 * rsi source
14 * rdx count
15 *
16 * Output:
17 * rax original destination
18 */
19
Jan Beulich8d379da2006-09-26 10:52:32 +020020 ALIGN
21memcpy_c:
22 CFI_STARTPROC
23 movq %rdi,%rax
24 movl %edx,%ecx
25 shrl $3,%ecx
26 andl $7,%edx
27 rep movsq
28 movl %edx,%ecx
29 rep movsb
30 ret
31 CFI_ENDPROC
32ENDPROC(memcpy_c)
33
34ENTRY(__memcpy)
35ENTRY(memcpy)
36 CFI_STARTPROC
Andi Kleen7bcd3f32006-02-03 21:51:02 +010037 pushq %rbx
Jan Beulich8d379da2006-09-26 10:52:32 +020038 CFI_ADJUST_CFA_OFFSET 8
39 CFI_REL_OFFSET rbx, 0
Andi Kleen7bcd3f32006-02-03 21:51:02 +010040 movq %rdi,%rax
41
42 movl %edx,%ecx
43 shrl $6,%ecx
44 jz .Lhandle_tail
45
46 .p2align 4
47.Lloop_64:
48 decl %ecx
49
50 movq (%rsi),%r11
51 movq 8(%rsi),%r8
52
53 movq %r11,(%rdi)
54 movq %r8,1*8(%rdi)
55
56 movq 2*8(%rsi),%r9
57 movq 3*8(%rsi),%r10
58
59 movq %r9,2*8(%rdi)
60 movq %r10,3*8(%rdi)
61
62 movq 4*8(%rsi),%r11
63 movq 5*8(%rsi),%r8
64
65 movq %r11,4*8(%rdi)
66 movq %r8,5*8(%rdi)
67
68 movq 6*8(%rsi),%r9
69 movq 7*8(%rsi),%r10
70
71 movq %r9,6*8(%rdi)
72 movq %r10,7*8(%rdi)
73
74 leaq 64(%rsi),%rsi
75 leaq 64(%rdi),%rdi
76 jnz .Lloop_64
77
78.Lhandle_tail:
79 movl %edx,%ecx
80 andl $63,%ecx
81 shrl $3,%ecx
82 jz .Lhandle_7
83 .p2align 4
84.Lloop_8:
85 decl %ecx
86 movq (%rsi),%r8
87 movq %r8,(%rdi)
88 leaq 8(%rdi),%rdi
89 leaq 8(%rsi),%rsi
90 jnz .Lloop_8
91
92.Lhandle_7:
93 movl %edx,%ecx
94 andl $7,%ecx
95 jz .Lende
96 .p2align 4
97.Lloop_1:
98 movb (%rsi),%r8b
99 movb %r8b,(%rdi)
100 incq %rdi
101 incq %rsi
102 decl %ecx
103 jnz .Lloop_1
104
105.Lende:
106 popq %rbx
Jan Beulich8d379da2006-09-26 10:52:32 +0200107 CFI_ADJUST_CFA_OFFSET -8
108 CFI_RESTORE rbx
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100109 ret
110.Lfinal:
Jan Beulich8d379da2006-09-26 10:52:32 +0200111 CFI_ENDPROC
112ENDPROC(memcpy)
113ENDPROC(__memcpy)
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100114
115 /* Some CPUs run faster using the string copy instructions.
116 It is also a lot simpler. Use this when possible */
117
Jan Beulich8d379da2006-09-26 10:52:32 +0200118 .section .altinstr_replacement,"ax"
1191: .byte 0xeb /* jmp <disp8> */
120 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1212:
122 .previous
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100123 .section .altinstructions,"a"
124 .align 8
Jan Beulich8d379da2006-09-26 10:52:32 +0200125 .quad memcpy
126 .quad 1b
127 .byte X86_FEATURE_REP_GOOD
128 .byte .Lfinal - memcpy
129 .byte 2b - 1b
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100130 .previous