blob: 9c5965273428bc31aaca2815efc5442e079a9871 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* memcpy.S: optimised assembly memcpy
2 *
3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12
13 .text
14 .p2align 4
15
16###############################################################################
17#
18# void *memcpy(void *to, const char *from, size_t count)
19#
20# - NOTE: must not use any stack. exception detection performs function return
21# to caller's fixup routine, aborting the remainder of the copy
22#
23###############################################################################
24 .globl memcpy,__memcpy_end
25 .type memcpy,@function
26memcpy:
27 or.p gr8,gr9,gr4
28 orcc gr10,gr0,gr0,icc3
29 or.p gr10,gr4,gr4
30 beqlr icc3,#0
31
32 # optimise based on best common alignment for to, from & count
33 andicc.p gr4,#0x0f,gr0,icc0
34 setlos #8,gr11
35 andicc.p gr4,#0x07,gr0,icc1
36 beq icc0,#0,memcpy_16
37 andicc.p gr4,#0x03,gr0,icc0
38 beq icc1,#0,memcpy_8
39 andicc.p gr4,#0x01,gr0,icc1
40 beq icc0,#0,memcpy_4
41 setlos.p #1,gr11
42 beq icc1,#0,memcpy_2
43
44 # do byte by byte copy
45 sub.p gr8,gr11,gr3
46 sub gr9,gr11,gr9
470: ldubu.p @(gr9,gr11),gr4
48 subicc gr10,#1,gr10,icc0
49 stbu.p gr4,@(gr3,gr11)
50 bne icc0,#2,0b
51 bralr
52
53 # do halfword by halfword copy
54memcpy_2:
55 setlos #2,gr11
56 sub.p gr8,gr11,gr3
57 sub gr9,gr11,gr9
580: lduhu.p @(gr9,gr11),gr4
59 subicc gr10,#2,gr10,icc0
60 sthu.p gr4,@(gr3,gr11)
61 bne icc0,#2,0b
62 bralr
63
64 # do word by word copy
65memcpy_4:
66 setlos #4,gr11
67 sub.p gr8,gr11,gr3
68 sub gr9,gr11,gr9
690: ldu.p @(gr9,gr11),gr4
70 subicc gr10,#4,gr10,icc0
71 stu.p gr4,@(gr3,gr11)
72 bne icc0,#2,0b
73 bralr
74
75 # do double-word by double-word copy
76memcpy_8:
77 sub.p gr8,gr11,gr3
78 sub gr9,gr11,gr9
790: lddu.p @(gr9,gr11),gr4
80 subicc gr10,#8,gr10,icc0
81 stdu.p gr4,@(gr3,gr11)
82 bne icc0,#2,0b
83 bralr
84
85 # do quad-word by quad-word copy
86memcpy_16:
87 sub.p gr8,gr11,gr3
88 sub gr9,gr11,gr9
890: lddu @(gr9,gr11),gr4
90 lddu.p @(gr9,gr11),gr6
91 subicc gr10,#16,gr10,icc0
92 stdu gr4,@(gr3,gr11)
93 stdu.p gr6,@(gr3,gr11)
94 bne icc0,#2,0b
95 bralr
96__memcpy_end:
97
98 .size memcpy, __memcpy_end-memcpy
99
100###############################################################################
101#
102# copy to/from userspace
103# - return the number of bytes that could not be copied (0 on complete success)
104#
105# long __memcpy_user(void *dst, const void *src, size_t count)
106#
107###############################################################################
108 .globl __memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler
109 .type __memcpy_user,@function
110__memcpy_user:
111 movsg lr,gr7
112 subi.p sp,#8,sp
113 add gr8,gr10,gr6 ; calculate expected end address
114 stdi gr6,@(sp,#0)
115
116 # abuse memcpy to do the dirty work
117 call memcpy
118__memcpy_user_error_lr:
119 ldi.p @(sp,#4),gr7
120 setlos #0,gr8
121 jmpl.p @(gr7,gr0)
122 addi sp,#8,sp
123
124 # deal any exception generated by memcpy
125 # GR8 - memcpy's current dest address
126 # GR11 - memset's step value (index register for store insns)
127__memcpy_user_error_handler:
128 lddi.p @(sp,#0),gr4 ; load GR4 with dst+count, GR5 with ret addr
129 add gr11,gr3,gr7
130 sub.p gr4,gr7,gr8
131
132 addi sp,#8,sp
133 jmpl @(gr5,gr0)
134
135 .size __memcpy_user, .-__memcpy_user