blob: 57d9291ad172c6a2660104a6af6d4392efac1efb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/alpha/lib/memcpy.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 */
6
7/*
8 * This is a reasonably optimized memcpy() routine.
9 */
10
11/*
12 * Note that the C code is written to be optimized into good assembly. However,
13 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
14 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
15 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
16 */
17
18#include <linux/types.h>
Al Viro00fc0e02016-01-11 09:51:29 -050019#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21/*
22 * This should be done in one go with ldq_u*2/mask/stq_u. Do it
23 * with a macro so that we can fix it up later..
24 */
25#define ALIGN_DEST_TO8_UP(d,s,n) \
26 while (d & 7) { \
27 if (n <= 0) return; \
28 n--; \
29 *(char *) d = *(char *) s; \
30 d++; s++; \
31 }
32#define ALIGN_DEST_TO8_DN(d,s,n) \
33 while (d & 7) { \
34 if (n <= 0) return; \
35 n--; \
36 d--; s--; \
37 *(char *) d = *(char *) s; \
38 }
39
40/*
41 * This should similarly be done with ldq_u*2/mask/stq. The destination
42 * is aligned, but we don't fill in a full quad-word
43 */
44#define DO_REST_UP(d,s,n) \
45 while (n > 0) { \
46 n--; \
47 *(char *) d = *(char *) s; \
48 d++; s++; \
49 }
50#define DO_REST_DN(d,s,n) \
51 while (n > 0) { \
52 n--; \
53 d--; s--; \
54 *(char *) d = *(char *) s; \
55 }
56
57/*
58 * This should be done with ldq/mask/stq. The source and destination are
59 * aligned, but we don't fill in a full quad-word
60 */
61#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
62#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
63
64/*
65 * This does unaligned memory copies. We want to avoid storing to
66 * an unaligned address, as that would do a read-modify-write cycle.
67 * We also want to avoid double-reading the unaligned reads.
68 *
69 * Note the ordering to try to avoid load (and address generation) latencies.
70 */
71static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
72 long n)
73{
74 ALIGN_DEST_TO8_UP(d,s,n);
75 n -= 8; /* to avoid compare against 8 in the loop */
76 if (n >= 0) {
77 unsigned long low_word, high_word;
78 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
79 do {
80 unsigned long tmp;
81 __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
82 n -= 8;
83 __asm__("extql %1,%2,%0"
84 :"=r" (low_word)
85 :"r" (low_word), "r" (s));
86 __asm__("extqh %1,%2,%0"
87 :"=r" (tmp)
88 :"r" (high_word), "r" (s));
89 s += 8;
90 *(unsigned long *) d = low_word | tmp;
91 d += 8;
92 low_word = high_word;
93 } while (n >= 0);
94 }
95 n += 8;
96 DO_REST_UP(d,s,n);
97}
98
99static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
100 long n)
101{
102 /* I don't understand AXP assembler well enough for this. -Tim */
103 s += n;
104 d += n;
105 while (n--)
106 * (char *) --d = * (char *) --s;
107}
108
109/*
110 * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
111 * for the load-store. I don't know why, but it would seem that using a floating
112 * point register for the move seems to slow things down (very small difference,
113 * though).
114 *
115 * Note the ordering to try to avoid load (and address generation) latencies.
116 */
117static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
118 long n)
119{
120 ALIGN_DEST_TO8_UP(d,s,n);
121 n -= 8;
122 while (n >= 0) {
123 unsigned long tmp;
124 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
125 n -= 8;
126 s += 8;
127 *(unsigned long *) d = tmp;
128 d += 8;
129 }
130 n += 8;
131 DO_REST_ALIGNED_UP(d,s,n);
132}
133static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
134 long n)
135{
136 s += n;
137 d += n;
138 ALIGN_DEST_TO8_DN(d,s,n);
139 n -= 8;
140 while (n >= 0) {
141 unsigned long tmp;
142 s -= 8;
143 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
144 n -= 8;
145 d -= 8;
146 *(unsigned long *) d = tmp;
147 }
148 n += 8;
149 DO_REST_ALIGNED_DN(d,s,n);
150}
151
152void * memcpy(void * dest, const void *src, size_t n)
153{
154 if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
155 __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
156 n);
157 return dest;
158 }
159 __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
160 return dest;
161}
Al Viro00fc0e02016-01-11 09:51:29 -0500162EXPORT_SYMBOL(memcpy);