blob: db01f2132af33c1e5ba140b1a9ea9984c05d5d46 [file] [log] [blame]
Daniel Dunbarb3a69012009-06-26 16:47:03 +00001// This file is distributed under the University of Illinois Open Source
2// License. See LICENSE.TXT for details.
3
4// di_int __ashrdi3(di_int input, int count);
5
6#ifdef __i386__
7#ifdef __SSE2__
8
9.text
10.align 4
11.globl ___ashrdi3
12___ashrdi3:
13 movd 12(%esp), %xmm2 // Load count
14 movl 8(%esp), %eax
15#ifndef TRUST_CALLERS_USE_64_BIT_STORES
16 movd 4(%esp), %xmm0
17 movd 8(%esp), %xmm1
18 punpckldq %xmm1, %xmm0 // Load input
19#else
20 movq 4(%esp), %xmm0 // Load input
21#endif
22
23 psrlq %xmm2, %xmm0 // unsigned shift input by count
24
25 testl %eax, %eax // check the sign-bit of the input
26 jns 1f // early out for positive inputs
27
28 // If the input is negative, we need to construct the shifted sign bit
29 // to or into the result, as xmm does not have a signed right shift.
30 pcmpeqb %xmm1, %xmm1 // -1ULL
31 psrlq $58, %xmm1 // 0x3f
32 pandn %xmm1, %xmm2 // 63 - count
33 pcmpeqb %xmm1, %xmm1 // -1ULL
34 psubq %xmm1, %xmm2 // 64 - count
35 psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits
36 por %xmm1, %xmm0
37
38 // Move the result back to the general purpose registers and return
391: movd %xmm0, %eax
40 psrlq $32, %xmm0
41 movd %xmm0, %edx
42 ret
43
44#else // Use GPRs instead of SSE2 instructions, if they aren't available.
45
46.text
47.align 4
48.globl ___ashrdi3
49___ashrdi3:
50 movl 12(%esp), %ecx // Load count
51 movl 8(%esp), %edx // Load high
52 movl 4(%esp), %eax // Load low
53
54 testl $0x20, %ecx // If count >= 32
55 jnz 2f // goto 2
56 testl $0x1f, %ecx // If count == 0
57 jz 1f // goto 1
58
59 pushl %ebx
60 movl %edx, %ebx // copy high
61 shrl %cl, %eax // right shift low by count
62 sarl %cl, %edx // right shift high by count
63 neg %cl
64 shll %cl, %ebx // left shift high by 32 - count
65 orl %ebx, %eax // or the result into the low word
66 popl %ebx
671: ret
68
692: movl %edx, %eax // Move high to low
70 sarl $31, %edx // clear high
71 sarl %cl, %eax // shift low by count - 32
72 ret
73
74#endif // __SSE2__
75#endif // __i386__