blob: 02de3d74d2c5bb319d48503371f451672d66dc80 [file] [log] [blame]
Borislav Petkovf5967102016-05-30 12:56:27 +02001#include <linux/linkage.h>
2
3#include <asm/asm.h>
4
5/*
6 * unsigned int __sw_hweight32(unsigned int w)
7 * %rdi: w
8 */
9ENTRY(__sw_hweight32)
10
11#ifdef CONFIG_X86_64
12 movl %edi, %eax # w
13#endif
14 __ASM_SIZE(push,) %__ASM_REG(dx)
15 movl %eax, %edx # w -> t
16 shrl %edx # t >>= 1
17 andl $0x55555555, %edx # t &= 0x55555555
18 subl %edx, %eax # w -= t
19
20 movl %eax, %edx # w -> t
21 shrl $2, %eax # w_tmp >>= 2
22 andl $0x33333333, %edx # t &= 0x33333333
23 andl $0x33333333, %eax # w_tmp &= 0x33333333
24 addl %edx, %eax # w = w_tmp + t
25
26 movl %eax, %edx # w -> t
27 shrl $4, %edx # t >>= 4
28 addl %edx, %eax # w_tmp += t
29 andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
30 imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
31 shrl $24, %eax # w = w_tmp >> 24
32 __ASM_SIZE(pop,) %__ASM_REG(dx)
33 ret
34ENDPROC(__sw_hweight32)
35
36ENTRY(__sw_hweight64)
37#ifdef CONFIG_X86_64
38 pushq %rdx
39
40 movq %rdi, %rdx # w -> t
41 movabsq $0x5555555555555555, %rax
42 shrq %rdx # t >>= 1
43 andq %rdx, %rax # t &= 0x5555555555555555
44 movabsq $0x3333333333333333, %rdx
45 subq %rax, %rdi # w -= t
46
47 movq %rdi, %rax # w -> t
48 shrq $2, %rdi # w_tmp >>= 2
49 andq %rdx, %rax # t &= 0x3333333333333333
50 andq %rdi, %rdx # w_tmp &= 0x3333333333333333
51 addq %rdx, %rax # w = w_tmp + t
52
53 movq %rax, %rdx # w -> t
54 shrq $4, %rdx # t >>= 4
55 addq %rdx, %rax # w_tmp += t
56 movabsq $0x0f0f0f0f0f0f0f0f, %rdx
57 andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
58 movabsq $0x0101010101010101, %rdx
59 imulq %rdx, %rax # w_tmp *= 0x0101010101010101
60 shrq $56, %rax # w = w_tmp >> 56
61
62 popq %rdx
63 ret
64#else /* CONFIG_X86_32 */
65 /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
66 pushl %ecx
67
68 call __sw_hweight32
69 movl %eax, %ecx # stash away result
70 movl %edx, %eax # second part of input
71 call __sw_hweight32
72 addl %ecx, %eax # result
73
74 popl %ecx
75 ret
76#endif
77ENDPROC(__sw_hweight64)