blob: 8a602a1e404a262f32fbe708e926e986636dcfca [file] [log] [blame]
Borislav Petkovf5967102016-05-30 12:56:27 +02001#include <linux/linkage.h>
2
3#include <asm/asm.h>
4
5/*
6 * unsigned int __sw_hweight32(unsigned int w)
7 * %rdi: w
8 */
9ENTRY(__sw_hweight32)
10
11#ifdef CONFIG_X86_64
12 movl %edi, %eax # w
13#endif
14 __ASM_SIZE(push,) %__ASM_REG(dx)
15 movl %eax, %edx # w -> t
16 shrl %edx # t >>= 1
17 andl $0x55555555, %edx # t &= 0x55555555
18 subl %edx, %eax # w -= t
19
20 movl %eax, %edx # w -> t
21 shrl $2, %eax # w_tmp >>= 2
22 andl $0x33333333, %edx # t &= 0x33333333
23 andl $0x33333333, %eax # w_tmp &= 0x33333333
24 addl %edx, %eax # w = w_tmp + t
25
26 movl %eax, %edx # w -> t
27 shrl $4, %edx # t >>= 4
28 addl %edx, %eax # w_tmp += t
29 andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
30 imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
31 shrl $24, %eax # w = w_tmp >> 24
32 __ASM_SIZE(pop,) %__ASM_REG(dx)
33 ret
34ENDPROC(__sw_hweight32)
35
36ENTRY(__sw_hweight64)
37#ifdef CONFIG_X86_64
Ville Syrjälä65ea11e2016-08-08 20:35:29 +030038 pushq %rdi
Borislav Petkovf5967102016-05-30 12:56:27 +020039 pushq %rdx
40
41 movq %rdi, %rdx # w -> t
42 movabsq $0x5555555555555555, %rax
43 shrq %rdx # t >>= 1
44 andq %rdx, %rax # t &= 0x5555555555555555
45 movabsq $0x3333333333333333, %rdx
46 subq %rax, %rdi # w -= t
47
48 movq %rdi, %rax # w -> t
49 shrq $2, %rdi # w_tmp >>= 2
50 andq %rdx, %rax # t &= 0x3333333333333333
51 andq %rdi, %rdx # w_tmp &= 0x3333333333333333
52 addq %rdx, %rax # w = w_tmp + t
53
54 movq %rax, %rdx # w -> t
55 shrq $4, %rdx # t >>= 4
56 addq %rdx, %rax # w_tmp += t
57 movabsq $0x0f0f0f0f0f0f0f0f, %rdx
58 andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
59 movabsq $0x0101010101010101, %rdx
60 imulq %rdx, %rax # w_tmp *= 0x0101010101010101
61 shrq $56, %rax # w = w_tmp >> 56
62
63 popq %rdx
Ville Syrjälä65ea11e2016-08-08 20:35:29 +030064 popq %rdi
Borislav Petkovf5967102016-05-30 12:56:27 +020065 ret
66#else /* CONFIG_X86_32 */
67 /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
68 pushl %ecx
69
70 call __sw_hweight32
71 movl %eax, %ecx # stash away result
72 movl %edx, %eax # second part of input
73 call __sw_hweight32
74 addl %ecx, %eax # result
75
76 popl %ecx
77 ret
78#endif
79ENDPROC(__sw_hweight64)