blob: ad5441ed1b57fdfffe6822dede52cad626ae2b39 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* Copyright 2002 Andi Kleen */
Dave Jones038b0a62006-10-04 03:38:54 -04002
Jan Beulich8d379da2006-09-26 10:52:32 +02003#include <linux/linkage.h>
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +01004
Jan Beulich8d379da2006-09-26 10:52:32 +02005#include <asm/cpufeature.h>
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +01006#include <asm/dwarf2.h>
Jan Beulich8d379da2006-09-26 10:52:32 +02007
Linus Torvalds1da177e2005-04-16 15:20:36 -07008/*
9 * memcpy - Copy a memory block.
10 *
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010011 * Input:
12 * rdi destination
13 * rsi source
14 * rdx count
15 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 * Output:
17 * rax original destination
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010018 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010020/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 *
23 * Calls to this get patched into the kernel image via the
24 * alternative instructions framework:
25 */
Jan Beulich8d379da2006-09-26 10:52:32 +020026 ALIGN
27memcpy_c:
28 CFI_STARTPROC
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010029 movq %rdi, %rax
30
31 movl %edx, %ecx
32 shrl $3, %ecx
33 andl $7, %edx
Jan Beulich8d379da2006-09-26 10:52:32 +020034 rep movsq
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010035 movl %edx, %ecx
Jan Beulich8d379da2006-09-26 10:52:32 +020036 rep movsb
37 ret
38 CFI_ENDPROC
39ENDPROC(memcpy_c)
40
41ENTRY(__memcpy)
42ENTRY(memcpy)
43 CFI_STARTPROC
Andi Kleen7bcd3f32006-02-03 21:51:02 +010044
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010045 /*
46 * Put the number of full 64-byte blocks into %ecx.
47 * Tail portion is handled at the end:
48 */
49 movq %rdi, %rax
50 movl %edx, %ecx
51 shrl $6, %ecx
Andi Kleen7bcd3f32006-02-03 21:51:02 +010052 jz .Lhandle_tail
53
54 .p2align 4
55.Lloop_64:
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010056 /*
57 * We decrement the loop index here - and the zero-flag is
58 * checked at the end of the loop (instructions inbetween do
59 * not change the zero flag):
60 */
Andi Kleen7bcd3f32006-02-03 21:51:02 +010061 decl %ecx
62
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010063 /*
64 * Move in blocks of 4x16 bytes:
65 */
66 movq 0*8(%rsi), %r11
67 movq 1*8(%rsi), %r8
68 movq %r11, 0*8(%rdi)
69 movq %r8, 1*8(%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +010070
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010071 movq 2*8(%rsi), %r9
72 movq 3*8(%rsi), %r10
73 movq %r9, 2*8(%rdi)
74 movq %r10, 3*8(%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +010075
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010076 movq 4*8(%rsi), %r11
77 movq 5*8(%rsi), %r8
78 movq %r11, 4*8(%rdi)
79 movq %r8, 5*8(%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +010080
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010081 movq 6*8(%rsi), %r9
82 movq 7*8(%rsi), %r10
83 movq %r9, 6*8(%rdi)
84 movq %r10, 7*8(%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +010085
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010086 leaq 64(%rsi), %rsi
87 leaq 64(%rdi), %rdi
Andi Kleen7bcd3f32006-02-03 21:51:02 +010088
Andi Kleen7bcd3f32006-02-03 21:51:02 +010089 jnz .Lloop_64
90
91.Lhandle_tail:
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010092 movl %edx, %ecx
93 andl $63, %ecx
94 shrl $3, %ecx
Andi Kleen7bcd3f32006-02-03 21:51:02 +010095 jz .Lhandle_7
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +010096
Andi Kleen7bcd3f32006-02-03 21:51:02 +010097 .p2align 4
98.Lloop_8:
99 decl %ecx
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100100 movq (%rsi), %r8
101 movq %r8, (%rdi)
102 leaq 8(%rdi), %rdi
103 leaq 8(%rsi), %rsi
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100104 jnz .Lloop_8
105
106.Lhandle_7:
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100107 movl %edx, %ecx
108 andl $7, %ecx
109 jz .Lend
110
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100111 .p2align 4
112.Lloop_1:
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100113 movb (%rsi), %r8b
114 movb %r8b, (%rdi)
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100115 incq %rdi
116 incq %rsi
117 decl %ecx
118 jnz .Lloop_1
119
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100120.Lend:
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100121 ret
Jan Beulich8d379da2006-09-26 10:52:32 +0200122 CFI_ENDPROC
123ENDPROC(memcpy)
124ENDPROC(__memcpy)
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100125
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100126 /*
127 * Some CPUs run faster using the string copy instructions.
128 * It is also a lot simpler. Use this when possible:
129 */
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100130
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100131 .section .altinstr_replacement, "ax"
Jan Beulich8d379da2006-09-26 10:52:32 +02001321: .byte 0xeb /* jmp <disp8> */
133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1342:
135 .previous
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100136
137 .section .altinstructions, "a"
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100138 .align 8
Jan Beulich8d379da2006-09-26 10:52:32 +0200139 .quad memcpy
140 .quad 1b
141 .byte X86_FEATURE_REP_GOOD
Ingo Molnarf3b6eaf2009-03-12 12:20:17 +0100142
143 /*
144 * Replace only beginning, memcpy is used to apply alternatives,
145 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome...
147 */
Petr Vandrovecb8d3f2442007-08-12 10:12:52 +0200148 .byte 2b - 1b
Jan Beulich8d379da2006-09-26 10:52:32 +0200149 .byte 2b - 1b
Andi Kleen7bcd3f32006-02-03 21:51:02 +0100150 .previous