blob: d0ec9c2936d75fb6e7c908c00f2359fd0c366dd5 [file] [log] [blame]
Fenghua Yu9599ec02011-01-17 17:39:15 -08001/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
Fenghua Yu057e05c2011-05-17 15:29:17 -070011#include <asm/cpufeature.h>
Fenghua Yu9599ec02011-01-17 17:39:15 -080012
13#undef memmove
14
15/*
16 * Implement memmove(). This can handle overlap between src and dst.
17 *
18 * Input:
19 * rdi: dest
20 * rsi: src
21 * rdx: count
22 *
23 * Output:
24 * rax: dest
25 */
26ENTRY(memmove)
27 CFI_STARTPROC
Fenghua Yu057e05c2011-05-17 15:29:17 -070028
Fenghua Yu9599ec02011-01-17 17:39:15 -080029 /* Handle more 32bytes in loop */
30 mov %rdi, %rax
31 cmp $0x20, %rdx
32 jb 1f
33
34 /* Decide forward/backward copy mode */
35 cmp %rdi, %rsi
Fenghua Yu057e05c2011-05-17 15:29:17 -070036 jge .Lmemmove_begin_forward
37 mov %rsi, %r8
38 add %rdx, %r8
39 cmp %rdi, %r8
40 jg 2f
Fenghua Yu9599ec02011-01-17 17:39:15 -080041
Fenghua Yu057e05c2011-05-17 15:29:17 -070042.Lmemmove_begin_forward:
Fenghua Yu9599ec02011-01-17 17:39:15 -080043 /*
44 * movsq instruction have many startup latency
45 * so we handle small size by general register.
46 */
47 cmp $680, %rdx
48 jb 3f
49 /*
50 * movsq instruction is only good for aligned case.
51 */
52
53 cmpb %dil, %sil
54 je 4f
553:
56 sub $0x20, %rdx
57 /*
58 * We gobble 32byts forward in each loop.
59 */
605:
61 sub $0x20, %rdx
62 movq 0*8(%rsi), %r11
63 movq 1*8(%rsi), %r10
64 movq 2*8(%rsi), %r9
65 movq 3*8(%rsi), %r8
66 leaq 4*8(%rsi), %rsi
67
68 movq %r11, 0*8(%rdi)
69 movq %r10, 1*8(%rdi)
70 movq %r9, 2*8(%rdi)
71 movq %r8, 3*8(%rdi)
72 leaq 4*8(%rdi), %rdi
73 jae 5b
74 addq $0x20, %rdx
75 jmp 1f
76 /*
77 * Handle data forward by movsq.
78 */
79 .p2align 4
804:
81 movq %rdx, %rcx
82 movq -8(%rsi, %rdx), %r11
83 lea -8(%rdi, %rdx), %r10
84 shrq $3, %rcx
85 rep movsq
86 movq %r11, (%r10)
87 jmp 13f
Fenghua Yu057e05c2011-05-17 15:29:17 -070088.Lmemmove_end_forward:
89
Fenghua Yu9599ec02011-01-17 17:39:15 -080090 /*
91 * Handle data backward by movsq.
92 */
93 .p2align 4
947:
95 movq %rdx, %rcx
96 movq (%rsi), %r11
97 movq %rdi, %r10
98 leaq -8(%rsi, %rdx), %rsi
99 leaq -8(%rdi, %rdx), %rdi
100 shrq $3, %rcx
101 std
102 rep movsq
103 cld
104 movq %r11, (%r10)
105 jmp 13f
106
107 /*
108 * Start to prepare for backward copy.
109 */
110 .p2align 4
1112:
112 cmp $680, %rdx
113 jb 6f
114 cmp %dil, %sil
115 je 7b
1166:
117 /*
118 * Calculate copy position to tail.
119 */
120 addq %rdx, %rsi
121 addq %rdx, %rdi
122 subq $0x20, %rdx
123 /*
124 * We gobble 32byts backward in each loop.
125 */
1268:
127 subq $0x20, %rdx
128 movq -1*8(%rsi), %r11
129 movq -2*8(%rsi), %r10
130 movq -3*8(%rsi), %r9
131 movq -4*8(%rsi), %r8
132 leaq -4*8(%rsi), %rsi
133
134 movq %r11, -1*8(%rdi)
135 movq %r10, -2*8(%rdi)
136 movq %r9, -3*8(%rdi)
137 movq %r8, -4*8(%rdi)
138 leaq -4*8(%rdi), %rdi
139 jae 8b
140 /*
141 * Calculate copy position to head.
142 */
143 addq $0x20, %rdx
144 subq %rdx, %rsi
145 subq %rdx, %rdi
1461:
147 cmpq $16, %rdx
148 jb 9f
149 /*
150 * Move data from 16 bytes to 31 bytes.
151 */
152 movq 0*8(%rsi), %r11
153 movq 1*8(%rsi), %r10
154 movq -2*8(%rsi, %rdx), %r9
155 movq -1*8(%rsi, %rdx), %r8
156 movq %r11, 0*8(%rdi)
157 movq %r10, 1*8(%rdi)
158 movq %r9, -2*8(%rdi, %rdx)
159 movq %r8, -1*8(%rdi, %rdx)
160 jmp 13f
161 .p2align 4
1629:
163 cmpq $8, %rdx
164 jb 10f
165 /*
166 * Move data from 8 bytes to 15 bytes.
167 */
168 movq 0*8(%rsi), %r11
169 movq -1*8(%rsi, %rdx), %r10
170 movq %r11, 0*8(%rdi)
171 movq %r10, -1*8(%rdi, %rdx)
172 jmp 13f
17310:
174 cmpq $4, %rdx
175 jb 11f
176 /*
177 * Move data from 4 bytes to 7 bytes.
178 */
179 movl (%rsi), %r11d
180 movl -4(%rsi, %rdx), %r10d
181 movl %r11d, (%rdi)
182 movl %r10d, -4(%rdi, %rdx)
183 jmp 13f
18411:
185 cmp $2, %rdx
186 jb 12f
187 /*
188 * Move data from 2 bytes to 3 bytes.
189 */
190 movw (%rsi), %r11w
191 movw -2(%rsi, %rdx), %r10w
192 movw %r11w, (%rdi)
193 movw %r10w, -2(%rdi, %rdx)
194 jmp 13f
19512:
196 cmp $1, %rdx
197 jb 13f
198 /*
199 * Move data for 1 byte.
200 */
201 movb (%rsi), %r11b
202 movb %r11b, (%rdi)
20313:
204 retq
205 CFI_ENDPROC
Fenghua Yu057e05c2011-05-17 15:29:17 -0700206
207 .section .altinstr_replacement,"ax"
208.Lmemmove_begin_forward_efs:
209 /* Forward moving data. */
210 movq %rdx, %rcx
211 rep movsb
212 retq
213.Lmemmove_end_forward_efs:
214 .previous
215
216 .section .altinstructions,"a"
217 .align 8
218 .quad .Lmemmove_begin_forward
219 .quad .Lmemmove_begin_forward_efs
220 .word X86_FEATURE_ERMS
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223 .previous
Fenghua Yu9599ec02011-01-17 17:39:15 -0800224ENDPROC(memmove)