blob: 8953d2382a653a948afb340304b07bbfc51181d8 [file] [log] [blame]
Anton Blanchard15c2d452015-01-21 12:27:38 +11001/*
2 * Author: Anton Blanchard <anton@au.ibm.com>
3 * Copyright 2015 IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10#include <asm/ppc_asm.h>
11
12#define off8 r6
13#define off16 r7
14#define off24 r8
15
16#define rA r9
17#define rB r10
18#define rC r11
19#define rD r27
20#define rE r28
21#define rF r29
22#define rG r30
23#define rH r31
24
25#ifdef __LITTLE_ENDIAN__
26#define LD ldbrx
27#else
28#define LD ldx
29#endif
30
31_GLOBAL(memcmp)
32 cmpdi cr1,r5,0
33
34 /* Use the short loop if both strings are not 8B aligned */
35 or r6,r3,r4
36 andi. r6,r6,7
37
38 /* Use the short loop if length is less than 32B */
39 cmpdi cr6,r5,31
40
41 beq cr1,.Lzero
42 bne .Lshort
43 bgt cr6,.Llong
44
45.Lshort:
46 mtctr r5
47
481: lbz rA,0(r3)
49 lbz rB,0(r4)
50 subf. rC,rB,rA
51 bne .Lnon_zero
52 bdz .Lzero
53
54 lbz rA,1(r3)
55 lbz rB,1(r4)
56 subf. rC,rB,rA
57 bne .Lnon_zero
58 bdz .Lzero
59
60 lbz rA,2(r3)
61 lbz rB,2(r4)
62 subf. rC,rB,rA
63 bne .Lnon_zero
64 bdz .Lzero
65
66 lbz rA,3(r3)
67 lbz rB,3(r4)
68 subf. rC,rB,rA
69 bne .Lnon_zero
70
71 addi r3,r3,4
72 addi r4,r4,4
73
74 bdnz 1b
75
76.Lzero:
77 li r3,0
78 blr
79
80.Lnon_zero:
81 mr r3,rC
82 blr
83
84.Llong:
85 li off8,8
86 li off16,16
87 li off24,24
88
89 std r31,-8(r1)
90 std r30,-16(r1)
91 std r29,-24(r1)
92 std r28,-32(r1)
93 std r27,-40(r1)
94
95 srdi r0,r5,5
96 mtctr r0
97 andi. r5,r5,31
98
99 LD rA,0,r3
100 LD rB,0,r4
101
102 LD rC,off8,r3
103 LD rD,off8,r4
104
105 LD rE,off16,r3
106 LD rF,off16,r4
107
108 LD rG,off24,r3
109 LD rH,off24,r4
110 cmpld cr0,rA,rB
111
112 addi r3,r3,32
113 addi r4,r4,32
114
115 bdz .Lfirst32
116
117 LD rA,0,r3
118 LD rB,0,r4
119 cmpld cr1,rC,rD
120
121 LD rC,off8,r3
122 LD rD,off8,r4
123 cmpld cr6,rE,rF
124
125 LD rE,off16,r3
126 LD rF,off16,r4
127 cmpld cr7,rG,rH
128 bne cr0,.LcmpAB
129
130 LD rG,off24,r3
131 LD rH,off24,r4
132 cmpld cr0,rA,rB
133 bne cr1,.LcmpCD
134
135 addi r3,r3,32
136 addi r4,r4,32
137
138 bdz .Lsecond32
139
140 .balign 16
141
1421: LD rA,0,r3
143 LD rB,0,r4
144 cmpld cr1,rC,rD
145 bne cr6,.LcmpEF
146
147 LD rC,off8,r3
148 LD rD,off8,r4
149 cmpld cr6,rE,rF
150 bne cr7,.LcmpGH
151
152 LD rE,off16,r3
153 LD rF,off16,r4
154 cmpld cr7,rG,rH
155 bne cr0,.LcmpAB
156
157 LD rG,off24,r3
158 LD rH,off24,r4
159 cmpld cr0,rA,rB
160 bne cr1,.LcmpCD
161
162 addi r3,r3,32
163 addi r4,r4,32
164
165 bdnz 1b
166
167.Lsecond32:
168 cmpld cr1,rC,rD
169 bne cr6,.LcmpEF
170
171 cmpld cr6,rE,rF
172 bne cr7,.LcmpGH
173
174 cmpld cr7,rG,rH
175 bne cr0,.LcmpAB
176
177 bne cr1,.LcmpCD
178 bne cr6,.LcmpEF
179 bne cr7,.LcmpGH
180
181.Ltail:
182 ld r31,-8(r1)
183 ld r30,-16(r1)
184 ld r29,-24(r1)
185 ld r28,-32(r1)
186 ld r27,-40(r1)
187
188 cmpdi r5,0
189 beq .Lzero
190 b .Lshort
191
192.Lfirst32:
193 cmpld cr1,rC,rD
194 cmpld cr6,rE,rF
195 cmpld cr7,rG,rH
196
197 bne cr0,.LcmpAB
198 bne cr1,.LcmpCD
199 bne cr6,.LcmpEF
200 bne cr7,.LcmpGH
201
202 b .Ltail
203
204.LcmpAB:
205 li r3,1
206 bgt cr0,.Lout
207 li r3,-1
208 b .Lout
209
210.LcmpCD:
211 li r3,1
212 bgt cr1,.Lout
213 li r3,-1
214 b .Lout
215
216.LcmpEF:
217 li r3,1
218 bgt cr6,.Lout
219 li r3,-1
220 b .Lout
221
222.LcmpGH:
223 li r3,1
224 bgt cr7,.Lout
225 li r3,-1
226
227.Lout:
228 ld r31,-8(r1)
229 ld r30,-16(r1)
230 ld r29,-24(r1)
231 ld r28,-32(r1)
232 ld r27,-40(r1)
233 blr