blob: eb3b6e02242f4b91cffe943b105a04f22f1df249 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * arch/alpha/lib/memmove.S
3 *
4 * Barely optimized memmove routine for Alpha EV5.
5 *
6 * This is hand-massaged output from the original memcpy.c. We defer to
7 * memcpy whenever possible; the backwards copy loops are not unrolled.
8 */
9
10 .set noat
11 .set noreorder
12 .text
13
14 .align 4
15 .globl memmove
16 .ent memmove
17memmove:
18 ldgp $29, 0($27)
19 unop
20 nop
21 .prologue 1
22
23 addq $16,$18,$4
24 addq $17,$18,$5
25 cmpule $4,$17,$1 /* dest + n <= src */
26 cmpule $5,$16,$2 /* dest >= src + n */
27
28 bis $1,$2,$1
29 mov $16,$0
30 xor $16,$17,$2
31 bne $1,memcpy !samegp
32
33 and $2,7,$2 /* Test for src/dest co-alignment. */
34 and $16,7,$1
35 cmpule $16,$17,$3
36 bne $3,$memmove_up /* dest < src */
37
38 and $4,7,$1
39 bne $2,$misaligned_dn
40 unop
41 beq $1,$skip_aligned_byte_loop_head_dn
42
43$aligned_byte_loop_head_dn:
44 lda $4,-1($4)
45 lda $5,-1($5)
46 unop
47 ble $18,$egress
48
49 ldq_u $3,0($5)
50 ldq_u $2,0($4)
51 lda $18,-1($18)
52 extbl $3,$5,$1
53
54 insbl $1,$4,$1
55 mskbl $2,$4,$2
56 bis $1,$2,$1
57 and $4,7,$6
58
59 stq_u $1,0($4)
60 bne $6,$aligned_byte_loop_head_dn
61
62$skip_aligned_byte_loop_head_dn:
63 lda $18,-8($18)
64 blt $18,$skip_aligned_word_loop_dn
65
66$aligned_word_loop_dn:
67 ldq $1,-8($5)
68 nop
69 lda $5,-8($5)
70 lda $18,-8($18)
71
72 stq $1,-8($4)
73 nop
74 lda $4,-8($4)
75 bge $18,$aligned_word_loop_dn
76
77$skip_aligned_word_loop_dn:
78 lda $18,8($18)
79 bgt $18,$byte_loop_tail_dn
80 unop
81 ret $31,($26),1
82
83 .align 4
84$misaligned_dn:
85 nop
86 fnop
87 unop
88 beq $18,$egress
89
90$byte_loop_tail_dn:
91 ldq_u $3,-1($5)
92 ldq_u $2,-1($4)
93 lda $5,-1($5)
94 lda $4,-1($4)
95
96 lda $18,-1($18)
97 extbl $3,$5,$1
98 insbl $1,$4,$1
99 mskbl $2,$4,$2
100
101 bis $1,$2,$1
102 stq_u $1,0($4)
103 bgt $18,$byte_loop_tail_dn
104 br $egress
105
106$memmove_up:
107 mov $16,$4
108 mov $17,$5
109 bne $2,$misaligned_up
110 beq $1,$skip_aligned_byte_loop_head_up
111
112$aligned_byte_loop_head_up:
113 unop
114 ble $18,$egress
115 ldq_u $3,0($5)
116 ldq_u $2,0($4)
117
118 lda $18,-1($18)
119 extbl $3,$5,$1
120 insbl $1,$4,$1
121 mskbl $2,$4,$2
122
123 bis $1,$2,$1
124 lda $5,1($5)
125 stq_u $1,0($4)
126 lda $4,1($4)
127
128 and $4,7,$6
129 bne $6,$aligned_byte_loop_head_up
130
131$skip_aligned_byte_loop_head_up:
132 lda $18,-8($18)
133 blt $18,$skip_aligned_word_loop_up
134
135$aligned_word_loop_up:
136 ldq $1,0($5)
137 nop
138 lda $5,8($5)
139 lda $18,-8($18)
140
141 stq $1,0($4)
142 nop
143 lda $4,8($4)
144 bge $18,$aligned_word_loop_up
145
146$skip_aligned_word_loop_up:
147 lda $18,8($18)
148 bgt $18,$byte_loop_tail_up
149 unop
150 ret $31,($26),1
151
152 .align 4
153$misaligned_up:
154 nop
155 fnop
156 unop
157 beq $18,$egress
158
159$byte_loop_tail_up:
160 ldq_u $3,0($5)
161 ldq_u $2,0($4)
162 lda $18,-1($18)
163 extbl $3,$5,$1
164
165 insbl $1,$4,$1
166 mskbl $2,$4,$2
167 bis $1,$2,$1
168 stq_u $1,0($4)
169
170 lda $5,1($5)
171 lda $4,1($4)
172 nop
173 bgt $18,$byte_loop_tail_up
174
175$egress:
176 ret $31,($26),1
177 nop
178 nop
179 nop
180
181 .end memmove