blob: 5e72d49114179a40f0b08cc6ab6065ffff123877 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* $Id: memscan.S,v 1.3 2000/01/31 04:59:10 davem Exp $
2 * memscan.S: Optimized memscan for Sparc64.
3 *
4 * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
5 * Copyright (C) 1998 David S. Miller (davem@redhat.com)
6 */
7
8#define HI_MAGIC 0x8080808080808080
9#define LO_MAGIC 0x0101010101010101
10#define ASI_PL 0x88
11
12 .text
13 .align 32
14 .globl __memscan_zero, __memscan_generic
15 .globl memscan
16
17__memscan_zero:
18 /* %o0 = bufp, %o1 = size */
19 brlez,pn %o1, szzero
20 andcc %o0, 7, %g0
21 be,pt %icc, we_are_aligned
22 sethi %hi(HI_MAGIC), %o4
23 ldub [%o0], %o5
241: subcc %o1, 1, %o1
25 brz,pn %o5, 10f
26 add %o0, 1, %o0
27
28 be,pn %xcc, szzero
29 andcc %o0, 7, %g0
30 bne,a,pn %icc, 1b
31 ldub [%o0], %o5
32we_are_aligned:
33 ldxa [%o0] ASI_PL, %o5
34 or %o4, %lo(HI_MAGIC), %o3
35 sllx %o3, 32, %o4
36 or %o4, %o3, %o3
37
38 srlx %o3, 7, %o2
39msloop:
40 sub %o1, 8, %o1
41 add %o0, 8, %o0
42 sub %o5, %o2, %o4
43 xor %o4, %o5, %o4
44 andcc %o4, %o3, %g3
45 bne,pn %xcc, check_bytes
46 srlx %o4, 32, %g3
47
48 brgz,a,pt %o1, msloop
49 ldxa [%o0] ASI_PL, %o5
50check_bytes:
51 bne,a,pn %icc, 2f
52 andcc %o5, 0xff, %g0
53 add %o0, -5, %g2
54 ba,pt %xcc, 3f
55 srlx %o5, 32, %g7
56
572: srlx %o5, 8, %g7
58 be,pn %icc, 1f
59 add %o0, -8, %g2
60 andcc %g7, 0xff, %g0
61 srlx %g7, 8, %g7
62 be,pn %icc, 1f
63 inc %g2
64 andcc %g7, 0xff, %g0
65
66 srlx %g7, 8, %g7
67 be,pn %icc, 1f
68 inc %g2
69 andcc %g7, 0xff, %g0
70 srlx %g7, 8, %g7
71 be,pn %icc, 1f
72 inc %g2
73 andcc %g3, %o3, %g0
74
75 be,a,pn %icc, 2f
76 mov %o0, %g2
773: andcc %g7, 0xff, %g0
78 srlx %g7, 8, %g7
79 be,pn %icc, 1f
80 inc %g2
81 andcc %g7, 0xff, %g0
82 srlx %g7, 8, %g7
83
84 be,pn %icc, 1f
85 inc %g2
86 andcc %g7, 0xff, %g0
87 srlx %g7, 8, %g7
88 be,pn %icc, 1f
89 inc %g2
90 andcc %g7, 0xff, %g0
91 srlx %g7, 8, %g7
92
93 be,pn %icc, 1f
94 inc %g2
952: brgz,a,pt %o1, msloop
96 ldxa [%o0] ASI_PL, %o5
97 inc %g2
981: add %o0, %o1, %o0
99 cmp %g2, %o0
100 retl
101
102 movle %xcc, %g2, %o0
10310: retl
104 sub %o0, 1, %o0
105szzero: retl
106 nop
107
108memscan:
109__memscan_generic:
110 /* %o0 = addr, %o1 = c, %o2 = size */
111 brz,pn %o2, 3f
112 add %o0, %o2, %o3
113 ldub [%o0], %o5
114 sub %g0, %o2, %o4
1151:
116 cmp %o5, %o1
117 be,pn %icc, 2f
118 addcc %o4, 1, %o4
119 bne,a,pt %xcc, 1b
120 ldub [%o3 + %o4], %o5
121 retl
122 /* The delay slot is the same as the next insn, this is just to make it look more awful */
1232:
124 add %o3, %o4, %o0
125 retl
126 sub %o0, 1, %o0
1273:
128 retl
129 nop