blob: ee4ba0baabcc80c40d13ede42ccb204d4530c9d9 [file] [log] [blame]
Jens Axboe5e595512014-09-26 09:51:16 -06001#include <stdlib.h>
2#include <inttypes.h>
3
4#include "bloom.h"
5#include "../hash.h"
Jens Axboe02036c42014-09-26 13:30:00 -06006#include "../minmax.h"
Jens Axboeee4ceb72014-09-26 14:57:41 -06007#include "../crc/xxhash.h"
Jens Axboeb0e1bb92014-09-27 09:30:56 -06008#include "../crc/murmur3.h"
Jens Axboe91d97212014-09-27 21:28:47 -06009#include "../crc/crc32c.h"
10#include "../crc/fnv.h"
Jens Axboe5e595512014-09-26 09:51:16 -060011
12struct bloom {
13 uint64_t nentries;
14
15 uint32_t *map;
16};
17
18#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
19#define BITS_INDEX_MASK (BITS_PER_INDEX - 1)
20
Jens Axboeee4ceb72014-09-26 14:57:41 -060021struct bloom_hash {
22 unsigned int seed;
23 uint32_t (*fn)(const void *, uint32_t, uint32_t);
24};
25
Jens Axboe91d97212014-09-27 21:28:47 -060026static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
27{
28 return fio_crc32c(buf, len);
29}
30
31static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
32{
33 return fnv(buf, len, seed);
34}
35
36#define BLOOM_SEED 0x8989
37
Jens Axboeee4ceb72014-09-26 14:57:41 -060038struct bloom_hash hashes[] = {
39 {
Jens Axboe91d97212014-09-27 21:28:47 -060040 .seed = BLOOM_SEED,
Jens Axboeee4ceb72014-09-26 14:57:41 -060041 .fn = jhash,
42 },
43 {
Jens Axboe91d97212014-09-27 21:28:47 -060044 .seed = BLOOM_SEED,
Jens Axboeee4ceb72014-09-26 14:57:41 -060045 .fn = XXH32,
46 },
47 {
Jens Axboe91d97212014-09-27 21:28:47 -060048 .seed = BLOOM_SEED,
Jens Axboe67c1b7c2014-09-27 08:38:42 -060049 .fn = murmurhash3,
Jens Axboeee4ceb72014-09-26 14:57:41 -060050 },
Jens Axboe91d97212014-09-27 21:28:47 -060051 {
52 .seed = BLOOM_SEED,
53 .fn = bloom_crc32c,
54 },
55 {
56 .seed = BLOOM_SEED,
57 .fn = bloom_fnv,
58 },
Jens Axboeee4ceb72014-09-26 14:57:41 -060059};
60
Jens Axboe91d97212014-09-27 21:28:47 -060061#define N_HASHES 5
Jens Axboe5e595512014-09-26 09:51:16 -060062
Jens Axboe02036c42014-09-26 13:30:00 -060063#define MIN_ENTRIES 1073741824UL
64
Jens Axboe5e595512014-09-26 09:51:16 -060065struct bloom *bloom_new(uint64_t entries)
66{
67 struct bloom *b;
68 size_t no_uints;
69
Jens Axboe91d97212014-09-27 21:28:47 -060070 crc32c_intel_probe();
71
Jens Axboe5e595512014-09-26 09:51:16 -060072 b = malloc(sizeof(*b));
73 b->nentries = entries;
74 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
Jens Axboe02036c42014-09-26 13:30:00 -060075 no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
Jens Axboe5e595512014-09-26 09:51:16 -060076 b->map = calloc(no_uints, sizeof(uint32_t));
77 if (!b->map) {
78 free(b);
79 return NULL;
80 }
81
82 return b;
83}
84
85void bloom_free(struct bloom *b)
86{
87 free(b->map);
88 free(b);
89}
90
91static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
92 int set)
93{
Jens Axboeee4ceb72014-09-26 14:57:41 -060094 uint32_t hash[N_HASHES];
Jens Axboe5e595512014-09-26 09:51:16 -060095 int i, was_set;
96
Jens Axboeee4ceb72014-09-26 14:57:41 -060097 for (i = 0; i < N_HASHES; i++) {
98 hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
99 hash[i] = hash[i] % b->nentries;
100 }
Jens Axboe5e595512014-09-26 09:51:16 -0600101
102 was_set = 0;
103 for (i = 0; i < N_HASHES; i++) {
Jens Axboeee4ceb72014-09-26 14:57:41 -0600104 const unsigned int index = hash[i] / BITS_PER_INDEX;
105 const unsigned int bit = hash[i] & BITS_INDEX_MASK;
Jens Axboe5e595512014-09-26 09:51:16 -0600106
107 if (b->map[index] & (1U << bit))
108 was_set++;
109 if (set)
110 b->map[index] |= 1U << bit;
111 }
112
113 return was_set == N_HASHES;
114}
115
Jens Axboe5e595512014-09-26 09:51:16 -0600116int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
117{
118 return __bloom_check(b, data, nwords, 1);
119}