blob: d61044dd8b58e0e6620984468b2c5acc1fb2870b [file] [log] [blame]
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +05301/*
2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10
11#ifdef __LITTLE_ENDIAN__
12# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
13# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
14# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
15# define MERGE_2(RX,RY,IMM)
16# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
17# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
18#else
19# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
20# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
21# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
22# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
23# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
24# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
25#endif
26
27#ifdef CONFIG_ARC_HAS_LL64
28# define PREFETCH_READ(RX) prefetch [RX, 56]
29# define PREFETCH_WRITE(RX) prefetchw [RX, 64]
30# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
31# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
32# define ZOLSHFT 5
33# define ZOLAND 0x1F
34#else
35# define PREFETCH_READ(RX) prefetch [RX, 28]
36# define PREFETCH_WRITE(RX) prefetchw [RX, 32]
37# define LOADX(DST,RX) ld.ab DST, [RX, 4]
38# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
39# define ZOLSHFT 4
40# define ZOLAND 0xF
41#endif
42
Vineet Gupta86effd02016-09-19 16:42:25 -070043ENTRY_CFI(memcpy)
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053044 prefetch [r1] ; Prefetch the read location
45 prefetchw [r0] ; Prefetch the write location
46 mov.f 0, r2
47;;; if size is zero
48 jz.d [blink]
49 mov r3, r0 ; don;t clobber ret val
50
51;;; if size <= 8
52 cmp r2, 8
Vineet Guptaac506b72015-10-29 19:36:03 +053053 bls.d @.Lsmallchunk
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053054 mov.f lp_count, r2
55
56 and.f r4, r0, 0x03
57 rsub lp_count, r4, 4
Vineet Guptaac506b72015-10-29 19:36:03 +053058 lpnz @.Laligndestination
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053059 ;; LOOP BEGIN
60 ldb.ab r5, [r1,1]
61 sub r2, r2, 1
62 stb.ab r5, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +053063.Laligndestination:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053064
65;;; Check the alignment of the source
66 and.f r4, r1, 0x03
Vineet Guptaac506b72015-10-29 19:36:03 +053067 bnz.d @.Lsourceunaligned
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053068
69;;; CASE 0: Both source and destination are 32bit aligned
70;;; Convert len to Dwords, unfold x4
71 lsr.f lp_count, r2, ZOLSHFT
Vineet Guptaac506b72015-10-29 19:36:03 +053072 lpnz @.Lcopy32_64bytes
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053073 ;; LOOP START
74 LOADX (r6, r1)
75 PREFETCH_READ (r1)
76 PREFETCH_WRITE (r3)
77 LOADX (r8, r1)
78 LOADX (r10, r1)
79 LOADX (r4, r1)
80 STOREX (r6, r3)
81 STOREX (r8, r3)
82 STOREX (r10, r3)
83 STOREX (r4, r3)
Vineet Guptaac506b72015-10-29 19:36:03 +053084.Lcopy32_64bytes:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053085
86 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
Vineet Guptaac506b72015-10-29 19:36:03 +053087.Lsmallchunk:
88 lpnz @.Lcopyremainingbytes
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053089 ;; LOOP START
90 ldb.ab r5, [r1,1]
91 stb.ab r5, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +053092.Lcopyremainingbytes:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053093
94 j [blink]
95;;; END CASE 0
96
Vineet Guptaac506b72015-10-29 19:36:03 +053097.Lsourceunaligned:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +053098 cmp r4, 2
Vineet Guptaac506b72015-10-29 19:36:03 +053099 beq.d @.LunalignedOffby2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530100 sub r2, r2, 1
101
Vineet Guptaac506b72015-10-29 19:36:03 +0530102 bhi.d @.LunalignedOffby3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530103 ldb.ab r5, [r1, 1]
104
105;;; CASE 1: The source is unaligned, off by 1
106 ;; Hence I need to read 1 byte for a 16bit alignment
107 ;; and 2bytes to reach 32bit alignment
108 ldh.ab r6, [r1, 2]
109 sub r2, r2, 2
110 ;; Convert to words, unfold x2
111 lsr.f lp_count, r2, 3
112 MERGE_1 (r6, r6, 8)
113 MERGE_2 (r5, r5, 24)
114 or r5, r5, r6
115
116 ;; Both src and dst are aligned
Vineet Guptaac506b72015-10-29 19:36:03 +0530117 lpnz @.Lcopy8bytes_1
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530118 ;; LOOP START
119 ld.ab r6, [r1, 4]
120 prefetch [r1, 28] ;Prefetch the next read location
121 ld.ab r8, [r1,4]
122 prefetchw [r3, 32] ;Prefetch the next write location
123
124 SHIFT_1 (r7, r6, 24)
125 or r7, r7, r5
126 SHIFT_2 (r5, r6, 8)
127
128 SHIFT_1 (r9, r8, 24)
129 or r9, r9, r5
130 SHIFT_2 (r5, r8, 8)
131
132 st.ab r7, [r3, 4]
133 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530134.Lcopy8bytes_1:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530135
136 ;; Write back the remaining 16bits
137 EXTRACT_1 (r6, r5, 16)
138 sth.ab r6, [r3, 2]
139 ;; Write back the remaining 8bits
140 EXTRACT_2 (r5, r5, 16)
141 stb.ab r5, [r3, 1]
142
143 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530144 lpnz @.Lcopybytewise_1
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530145 ;; LOOP START
146 ldb.ab r6, [r1,1]
147 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530148.Lcopybytewise_1:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530149 j [blink]
150
Vineet Guptaac506b72015-10-29 19:36:03 +0530151.LunalignedOffby2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530152;;; CASE 2: The source is unaligned, off by 2
153 ldh.ab r5, [r1, 2]
154 sub r2, r2, 1
155
156 ;; Both src and dst are aligned
157 ;; Convert to words, unfold x2
158 lsr.f lp_count, r2, 3
159#ifdef __BIG_ENDIAN__
160 asl.nz r5, r5, 16
161#endif
Vineet Guptaac506b72015-10-29 19:36:03 +0530162 lpnz @.Lcopy8bytes_2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530163 ;; LOOP START
164 ld.ab r6, [r1, 4]
165 prefetch [r1, 28] ;Prefetch the next read location
166 ld.ab r8, [r1,4]
167 prefetchw [r3, 32] ;Prefetch the next write location
168
169 SHIFT_1 (r7, r6, 16)
170 or r7, r7, r5
171 SHIFT_2 (r5, r6, 16)
172
173 SHIFT_1 (r9, r8, 16)
174 or r9, r9, r5
175 SHIFT_2 (r5, r8, 16)
176
177 st.ab r7, [r3, 4]
178 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530179.Lcopy8bytes_2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530180
181#ifdef __BIG_ENDIAN__
182 lsr.nz r5, r5, 16
183#endif
184 sth.ab r5, [r3, 2]
185
186 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530187 lpnz @.Lcopybytewise_2
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530188 ;; LOOP START
189 ldb.ab r6, [r1,1]
190 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530191.Lcopybytewise_2:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530192 j [blink]
193
Vineet Guptaac506b72015-10-29 19:36:03 +0530194.LunalignedOffby3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530195;;; CASE 3: The source is unaligned, off by 3
196;;; Hence, I need to read 1byte for achieve the 32bit alignment
197
198 ;; Both src and dst are aligned
199 ;; Convert to words, unfold x2
200 lsr.f lp_count, r2, 3
201#ifdef __BIG_ENDIAN__
202 asl.ne r5, r5, 24
203#endif
Vineet Guptaac506b72015-10-29 19:36:03 +0530204 lpnz @.Lcopy8bytes_3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530205 ;; LOOP START
206 ld.ab r6, [r1, 4]
207 prefetch [r1, 28] ;Prefetch the next read location
208 ld.ab r8, [r1,4]
Vineet Gupta21481f22015-07-20 17:19:17 +0300209 prefetchw [r3, 32] ;Prefetch the next write location
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530210
211 SHIFT_1 (r7, r6, 8)
212 or r7, r7, r5
213 SHIFT_2 (r5, r6, 24)
214
215 SHIFT_1 (r9, r8, 8)
216 or r9, r9, r5
217 SHIFT_2 (r5, r8, 24)
218
219 st.ab r7, [r3, 4]
220 st.ab r9, [r3, 4]
Vineet Guptaac506b72015-10-29 19:36:03 +0530221.Lcopy8bytes_3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530222
223#ifdef __BIG_ENDIAN__
224 lsr.nz r5, r5, 24
225#endif
226 stb.ab r5, [r3, 1]
227
228 and.f lp_count, r2, 0x07 ;Last 8bytes
Vineet Guptaac506b72015-10-29 19:36:03 +0530229 lpnz @.Lcopybytewise_3
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530230 ;; LOOP START
231 ldb.ab r6, [r1,1]
232 stb.ab r6, [r3,1]
Vineet Guptaac506b72015-10-29 19:36:03 +0530233.Lcopybytewise_3:
Claudiu Zissulescu1f7e3dc2014-11-21 13:39:25 +0530234 j [blink]
235
Vineet Gupta86effd02016-09-19 16:42:25 -0700236END_CFI(memcpy)