blob: a810fc6f21d5dcedbe874e16dc541db407fa5c30 [file] [log] [blame]
Paul Mundt1aad54a2008-11-18 17:33:48 +09001/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005, 2006
3 Free Software Foundation, Inc.
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 2, or (at your option) any
8later version.
9
10In addition to the permissions in the GNU General Public License, the
11Free Software Foundation gives you unlimited permission to link the
12compiled version of this file into combinations with other programs,
13and to distribute those combinations without any restriction coming
14from the use of this file. (The General Public License restrictions
15do apply in other respects; for example, they cover modification of
16the file, and distribution when not linked into a combine
17executable.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING. If not, write to
26the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA. */
28
29!! libgcc routines for the Renesas / SuperH SH CPUs.
30!! Contributed by Steve Chamberlain.
31!! sac@cygnus.com
32
33!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34!! recoded in assembly by Toshiyasu Morita
35!! tm@netcom.com
36
37/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38 ELF local label prefixes by J"orn Rennecke
39 amylaar@cygnus.com */
40
41/* This code used shld, thus is not suitable for SH1 / SH2. */
42
43/* Signed / unsigned division without use of FPU, optimized for SH4.
44 Uses a lookup table for divisors in the range -128 .. +128, and
45 div1 with case distinction for larger divisors in three more ranges.
46 The code is lumped together with the table to allow the use of mova. */
47#ifdef CONFIG_CPU_LITTLE_ENDIAN
48#define L_LSB 0
49#define L_LSWMSB 1
50#define L_MSWLSB 2
51#else
52#define L_LSB 3
53#define L_LSWMSB 2
54#define L_MSWLSB 1
55#endif
56
57 .balign 4
58 .global __udivsi3_i4i
59 .global __udivsi3
60 .set __udivsi3, __udivsi3_i4i
61 .type __udivsi3_i4i, @function
62__udivsi3_i4i:
63 mov.w c128_w, r1
64 div0u
65 mov r4,r0
66 shlr8 r0
67 cmp/hi r1,r5
68 extu.w r5,r1
69 bf udiv_le128
70 cmp/eq r5,r1
71 bf udiv_ge64k
72 shlr r0
73 mov r5,r1
74 shll16 r5
75 mov.l r4,@-r15
76 div1 r5,r0
77 mov.l r1,@-r15
78 div1 r5,r0
79 div1 r5,r0
80 bra udiv_25
81 div1 r5,r0
82
83div_le128:
84 mova div_table_ix,r0
85 bra div_le128_2
86 mov.b @(r0,r5),r1
87udiv_le128:
88 mov.l r4,@-r15
89 mova div_table_ix,r0
90 mov.b @(r0,r5),r1
91 mov.l r5,@-r15
92div_le128_2:
93 mova div_table_inv,r0
94 mov.l @(r0,r1),r1
95 mov r5,r0
96 tst #0xfe,r0
97 mova div_table_clz,r0
98 dmulu.l r1,r4
99 mov.b @(r0,r5),r1
100 bt/s div_by_1
101 mov r4,r0
102 mov.l @r15+,r5
103 sts mach,r0
104 /* clrt */
105 addc r4,r0
106 mov.l @r15+,r4
107 rotcr r0
108 rts
109 shld r1,r0
110
111div_by_1_neg:
112 neg r4,r0
113div_by_1:
114 mov.l @r15+,r5
115 rts
116 mov.l @r15+,r4
117
118div_ge64k:
119 bt/s div_r8
120 div0u
121 shll8 r5
122 bra div_ge64k_2
123 div1 r5,r0
124udiv_ge64k:
125 cmp/hi r0,r5
126 mov r5,r1
127 bt udiv_r8
128 shll8 r5
129 mov.l r4,@-r15
130 div1 r5,r0
131 mov.l r1,@-r15
132div_ge64k_2:
133 div1 r5,r0
134 mov.l zero_l,r1
135 .rept 4
136 div1 r5,r0
137 .endr
138 mov.l r1,@-r15
139 div1 r5,r0
140 mov.w m256_w,r1
141 div1 r5,r0
142 mov.b r0,@(L_LSWMSB,r15)
143 xor r4,r0
144 and r1,r0
145 bra div_ge64k_end
146 xor r4,r0
147
148div_r8:
149 shll16 r4
150 bra div_r8_2
151 shll8 r4
152udiv_r8:
153 mov.l r4,@-r15
154 shll16 r4
155 clrt
156 shll8 r4
157 mov.l r5,@-r15
158div_r8_2:
159 rotcl r4
160 mov r0,r1
161 div1 r5,r1
162 mov r4,r0
163 rotcl r0
164 mov r5,r4
165 div1 r5,r1
166 .rept 5
167 rotcl r0; div1 r5,r1
168 .endr
169 rotcl r0
170 mov.l @r15+,r5
171 div1 r4,r1
172 mov.l @r15+,r4
173 rts
174 rotcl r0
175
176 .global __sdivsi3_i4i
177 .global __sdivsi3
178 .set __sdivsi3, __sdivsi3_i4i
179 .type __sdivsi3_i4i, @function
180 /* This is link-compatible with a __sdivsi3 call,
181 but we effectively clobber only r1. */
182__sdivsi3_i4i:
183 mov.l r4,@-r15
184 cmp/pz r5
185 mov.w c128_w, r1
186 bt/s pos_divisor
187 cmp/pz r4
188 mov.l r5,@-r15
189 neg r5,r5
190 bt/s neg_result
191 cmp/hi r1,r5
192 neg r4,r4
193pos_result:
194 extu.w r5,r0
195 bf div_le128
196 cmp/eq r5,r0
197 mov r4,r0
198 shlr8 r0
199 bf/s div_ge64k
200 cmp/hi r0,r5
201 div0u
202 shll16 r5
203 div1 r5,r0
204 div1 r5,r0
205 div1 r5,r0
206udiv_25:
207 mov.l zero_l,r1
208 div1 r5,r0
209 div1 r5,r0
210 mov.l r1,@-r15
211 .rept 3
212 div1 r5,r0
213 .endr
214 mov.b r0,@(L_MSWLSB,r15)
215 xtrct r4,r0
216 swap.w r0,r0
217 .rept 8
218 div1 r5,r0
219 .endr
220 mov.b r0,@(L_LSWMSB,r15)
221div_ge64k_end:
222 .rept 8
223 div1 r5,r0
224 .endr
225 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
226 extu.b r0,r0
227 mov.l @r15+,r5
228 or r4,r0
229 mov.l @r15+,r4
230 rts
231 rotcl r0
232
233div_le128_neg:
234 tst #0xfe,r0
235 mova div_table_ix,r0
236 mov.b @(r0,r5),r1
237 mova div_table_inv,r0
238 bt/s div_by_1_neg
239 mov.l @(r0,r1),r1
240 mova div_table_clz,r0
241 dmulu.l r1,r4
242 mov.b @(r0,r5),r1
243 mov.l @r15+,r5
244 sts mach,r0
245 /* clrt */
246 addc r4,r0
247 mov.l @r15+,r4
248 rotcr r0
249 shld r1,r0
250 rts
251 neg r0,r0
252
253pos_divisor:
254 mov.l r5,@-r15
255 bt/s pos_result
256 cmp/hi r1,r5
257 neg r4,r4
258neg_result:
259 extu.w r5,r0
260 bf div_le128_neg
261 cmp/eq r5,r0
262 mov r4,r0
263 shlr8 r0
264 bf/s div_ge64k_neg
265 cmp/hi r0,r5
266 div0u
267 mov.l zero_l,r1
268 shll16 r5
269 div1 r5,r0
270 mov.l r1,@-r15
271 .rept 7
272 div1 r5,r0
273 .endr
274 mov.b r0,@(L_MSWLSB,r15)
275 xtrct r4,r0
276 swap.w r0,r0
277 .rept 8
278 div1 r5,r0
279 .endr
280 mov.b r0,@(L_LSWMSB,r15)
281div_ge64k_neg_end:
282 .rept 8
283 div1 r5,r0
284 .endr
285 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
286 extu.b r0,r1
287 mov.l @r15+,r5
288 or r4,r1
289div_r8_neg_end:
290 mov.l @r15+,r4
291 rotcl r1
292 rts
293 neg r1,r0
294
295div_ge64k_neg:
296 bt/s div_r8_neg
297 div0u
298 shll8 r5
299 mov.l zero_l,r1
300 .rept 6
301 div1 r5,r0
302 .endr
303 mov.l r1,@-r15
304 div1 r5,r0
305 mov.w m256_w,r1
306 div1 r5,r0
307 mov.b r0,@(L_LSWMSB,r15)
308 xor r4,r0
309 and r1,r0
310 bra div_ge64k_neg_end
311 xor r4,r0
312
313c128_w:
314 .word 128
315
316div_r8_neg:
317 clrt
318 shll16 r4
319 mov r4,r1
320 shll8 r1
321 mov r5,r4
322 .rept 7
323 rotcl r1; div1 r5,r0
324 .endr
325 mov.l @r15+,r5
326 rotcl r1
327 bra div_r8_neg_end
328 div1 r4,r0
329
330m256_w:
331 .word 0xff00
332/* This table has been generated by divtab-sh4.c. */
333 .balign 4
334div_table_clz:
335 .byte 0
336 .byte 1
337 .byte 0
338 .byte -1
339 .byte -1
340 .byte -2
341 .byte -2
342 .byte -2
343 .byte -2
344 .byte -3
345 .byte -3
346 .byte -3
347 .byte -3
348 .byte -3
349 .byte -3
350 .byte -3
351 .byte -3
352 .byte -4
353 .byte -4
354 .byte -4
355 .byte -4
356 .byte -4
357 .byte -4
358 .byte -4
359 .byte -4
360 .byte -4
361 .byte -4
362 .byte -4
363 .byte -4
364 .byte -4
365 .byte -4
366 .byte -4
367 .byte -4
368 .byte -5
369 .byte -5
370 .byte -5
371 .byte -5
372 .byte -5
373 .byte -5
374 .byte -5
375 .byte -5
376 .byte -5
377 .byte -5
378 .byte -5
379 .byte -5
380 .byte -5
381 .byte -5
382 .byte -5
383 .byte -5
384 .byte -5
385 .byte -5
386 .byte -5
387 .byte -5
388 .byte -5
389 .byte -5
390 .byte -5
391 .byte -5
392 .byte -5
393 .byte -5
394 .byte -5
395 .byte -5
396 .byte -5
397 .byte -5
398 .byte -5
399 .byte -5
400 .byte -6
401 .byte -6
402 .byte -6
403 .byte -6
404 .byte -6
405 .byte -6
406 .byte -6
407 .byte -6
408 .byte -6
409 .byte -6
410 .byte -6
411 .byte -6
412 .byte -6
413 .byte -6
414 .byte -6
415 .byte -6
416 .byte -6
417 .byte -6
418 .byte -6
419 .byte -6
420 .byte -6
421 .byte -6
422 .byte -6
423 .byte -6
424 .byte -6
425 .byte -6
426 .byte -6
427 .byte -6
428 .byte -6
429 .byte -6
430 .byte -6
431 .byte -6
432 .byte -6
433 .byte -6
434 .byte -6
435 .byte -6
436 .byte -6
437 .byte -6
438 .byte -6
439 .byte -6
440 .byte -6
441 .byte -6
442 .byte -6
443 .byte -6
444 .byte -6
445 .byte -6
446 .byte -6
447 .byte -6
448 .byte -6
449 .byte -6
450 .byte -6
451 .byte -6
452 .byte -6
453 .byte -6
454 .byte -6
455 .byte -6
456 .byte -6
457 .byte -6
458 .byte -6
459 .byte -6
460 .byte -6
461 .byte -6
462 .byte -6
463/* Lookup table translating positive divisor to index into table of
464 normalized inverse. N.B. the '0' entry is also the last entry of the
465 previous table, and causes an unaligned access for division by zero. */
466div_table_ix:
467 .byte -6
468 .byte -128
469 .byte -128
470 .byte 0
471 .byte -128
472 .byte -64
473 .byte 0
474 .byte 64
475 .byte -128
476 .byte -96
477 .byte -64
478 .byte -32
479 .byte 0
480 .byte 32
481 .byte 64
482 .byte 96
483 .byte -128
484 .byte -112
485 .byte -96
486 .byte -80
487 .byte -64
488 .byte -48
489 .byte -32
490 .byte -16
491 .byte 0
492 .byte 16
493 .byte 32
494 .byte 48
495 .byte 64
496 .byte 80
497 .byte 96
498 .byte 112
499 .byte -128
500 .byte -120
501 .byte -112
502 .byte -104
503 .byte -96
504 .byte -88
505 .byte -80
506 .byte -72
507 .byte -64
508 .byte -56
509 .byte -48
510 .byte -40
511 .byte -32
512 .byte -24
513 .byte -16
514 .byte -8
515 .byte 0
516 .byte 8
517 .byte 16
518 .byte 24
519 .byte 32
520 .byte 40
521 .byte 48
522 .byte 56
523 .byte 64
524 .byte 72
525 .byte 80
526 .byte 88
527 .byte 96
528 .byte 104
529 .byte 112
530 .byte 120
531 .byte -128
532 .byte -124
533 .byte -120
534 .byte -116
535 .byte -112
536 .byte -108
537 .byte -104
538 .byte -100
539 .byte -96
540 .byte -92
541 .byte -88
542 .byte -84
543 .byte -80
544 .byte -76
545 .byte -72
546 .byte -68
547 .byte -64
548 .byte -60
549 .byte -56
550 .byte -52
551 .byte -48
552 .byte -44
553 .byte -40
554 .byte -36
555 .byte -32
556 .byte -28
557 .byte -24
558 .byte -20
559 .byte -16
560 .byte -12
561 .byte -8
562 .byte -4
563 .byte 0
564 .byte 4
565 .byte 8
566 .byte 12
567 .byte 16
568 .byte 20
569 .byte 24
570 .byte 28
571 .byte 32
572 .byte 36
573 .byte 40
574 .byte 44
575 .byte 48
576 .byte 52
577 .byte 56
578 .byte 60
579 .byte 64
580 .byte 68
581 .byte 72
582 .byte 76
583 .byte 80
584 .byte 84
585 .byte 88
586 .byte 92
587 .byte 96
588 .byte 100
589 .byte 104
590 .byte 108
591 .byte 112
592 .byte 116
593 .byte 120
594 .byte 124
595 .byte -128
596/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
597 .balign 4
598zero_l:
599 .long 0x0
600 .long 0xF81F81F9
601 .long 0xF07C1F08
602 .long 0xE9131AC0
603 .long 0xE1E1E1E2
604 .long 0xDAE6076C
605 .long 0xD41D41D5
606 .long 0xCD856891
607 .long 0xC71C71C8
608 .long 0xC0E07039
609 .long 0xBACF914D
610 .long 0xB4E81B4F
611 .long 0xAF286BCB
612 .long 0xA98EF607
613 .long 0xA41A41A5
614 .long 0x9EC8E952
615 .long 0x9999999A
616 .long 0x948B0FCE
617 .long 0x8F9C18FA
618 .long 0x8ACB90F7
619 .long 0x86186187
620 .long 0x81818182
621 .long 0x7D05F418
622 .long 0x78A4C818
623 .long 0x745D1746
624 .long 0x702E05C1
625 .long 0x6C16C16D
626 .long 0x68168169
627 .long 0x642C8591
628 .long 0x60581606
629 .long 0x5C9882BA
630 .long 0x58ED2309
631div_table_inv:
632 .long 0x55555556
633 .long 0x51D07EAF
634 .long 0x4E5E0A73
635 .long 0x4AFD6A06
636 .long 0x47AE147B
637 .long 0x446F8657
638 .long 0x41414142
639 .long 0x3E22CBCF
640 .long 0x3B13B13C
641 .long 0x38138139
642 .long 0x3521CFB3
643 .long 0x323E34A3
644 .long 0x2F684BDB
645 .long 0x2C9FB4D9
646 .long 0x29E4129F
647 .long 0x27350B89
648 .long 0x24924925
649 .long 0x21FB7813
650 .long 0x1F7047DD
651 .long 0x1CF06ADB
652 .long 0x1A7B9612
653 .long 0x18118119
654 .long 0x15B1E5F8
655 .long 0x135C8114
656 .long 0x11111112
657 .long 0xECF56BF
658 .long 0xC9714FC
659 .long 0xA6810A7
660 .long 0x8421085
661 .long 0x624DD30
662 .long 0x4104105
663 .long 0x2040811
664 /* maximum error: 0.987342 scaled: 0.921875*/